From c404e0dc2c843b154f9a36c3aec10d0a715d88eb Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 30 Jan 2014 16:46:55 +0800 Subject: Btrfs: fix use-after-free in the finishing procedure of the device replace During device replace test, we hit a null pointer deference (It was very easy to reproduce it by running xfstests' btrfs/011 on the devices with the virtio scsi driver). There were two bugs that caused this problem: - We might allocate new chunks on the replaced device after we updated the mapping tree. And we forgot to replace the source device in those mapping of the new chunks. - We might get the mapping information which including the source device before the mapping information update. And then submit the bio which was based on that mapping information after we freed the source device. For the first bug, we can fix it by doing mapping tree update and source device remove in the same context of the chunk mutex. The chunk mutex is used to protect the allocable device list, the above method can avoid the new chunk allocation, and after we remove the source device, all the new chunks will be allocated on the new device. So it can fix the first bug. For the second bug, we need make sure all flighting bios are finished and no new bios are produced during we are removing the source device. To fix this problem, we introduced a global @bio_counter, we not only inc/dec @bio_counter outsize of map_blocks, but also inc it before submitting bio and dec @bio_counter when ending bios. Since Raid56 is a little different and device replace dosen't support raid56 yet, it is not addressed in the patch and I add comments to make sure we will fix it in the future. Reported-by: Qu Wenruo Signed-off-by: Wang Shilong Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fcf367581073..0cafacb07b43 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2136,10 +2136,16 @@ int open_ctree(struct super_block *sb, goto fail_dirty_metadata_bytes; } + ret = percpu_counter_init(&fs_info->bio_counter, 0); + if (ret) { + err = ret; + goto fail_delalloc_bytes; + } + fs_info->btree_inode = new_inode(sb); if (!fs_info->btree_inode) { err = -ENOMEM; - goto fail_delalloc_bytes; + goto fail_bio_counter; } mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -2214,6 +2220,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->scrub_pause_req, 0); atomic_set(&fs_info->scrubs_paused, 0); atomic_set(&fs_info->scrub_cancel_req, 0); + init_waitqueue_head(&fs_info->replace_wait); init_waitqueue_head(&fs_info->scrub_pause_wait); fs_info->scrub_workers_refcnt = 0; #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY @@ -2966,6 +2973,8 @@ fail_iput: btrfs_mapping_tree_free(&fs_info->mapping_tree); iput(fs_info->btree_inode); +fail_bio_counter: + percpu_counter_destroy(&fs_info->bio_counter); fail_delalloc_bytes: percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: @@ -3613,6 +3622,7 @@ int close_ctree(struct btrfs_root *root) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); + percpu_counter_destroy(&fs_info->bio_counter); bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); -- cgit v1.2.1 From f88ba6a2a44ee98e8d59654463dc157bb6d13c43 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 5 Feb 2014 16:34:38 +0900 Subject: Btrfs: skip submitting barrier for missing device I got an error on v3.13: BTRFS error (device sdf1) in write_all_supers:3378: errno=-5 IO failure (errors while submitting device barriers.) how to reproduce: > mkfs.btrfs -f -d raid1 /dev/sdf1 /dev/sdf2 > wipefs -a /dev/sdf2 > mount -o degraded /dev/sdf1 /mnt > btrfs balance start -f -sconvert=single -mconvert=single -dconvert=single /mnt The reason of the error is that barrier_all_devices() failed to submit barrier to the missing device. However it is clear that we cannot do anything on missing device, and also it is not necessary to care chunks on the missing device. This patch stops sending/waiting barrier if device is missing. Signed-off-by: Hidetoshi Seto Cc: Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0cafacb07b43..74c9be89fc0c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3256,6 +3256,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* send down all the barriers */ head = &info->fs_devices->devices; list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; if (!dev->bdev) { errors_send++; continue; @@ -3270,6 +3272,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* wait for all the barriers */ list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; if (!dev->bdev) { errors_wait++; continue; -- cgit v1.2.1 From 2a85d9cac160bb5b845985a60007cc8348d77def Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 10 Feb 2014 17:07:16 +0800 Subject: Btrfs: fix possible deadlock in btrfs_cleanup_transaction [13654.480669] ====================================================== [13654.480905] [ INFO: possible circular locking dependency detected ] [13654.481003] 3.12.0+ #4 Tainted: G W O [13654.481060] ------------------------------------------------------- [13654.481060] btrfs-transacti/9347 is trying to acquire lock: [13654.481060] (&(&root->ordered_extent_lock)->rlock){+.+...}, at: [] btrfs_cleanup_transaction+0x271/0x570 [btrfs] [13654.481060] but task is already holding lock: [13654.481060] (&(&fs_info->ordered_root_lock)->rlock){+.+...}, at: [] btrfs_cleanup_transaction+0x1e5/0x570 [btrfs] [13654.481060] which lock already depends on the new lock. [13654.481060] the existing dependency chain (in reverse order) is: [13654.481060] -> #1 (&(&fs_info->ordered_root_lock)->rlock){+.+...}: [13654.481060] [] lock_acquire+0x93/0x130 [13654.481060] [] _raw_spin_lock+0x41/0x50 [13654.481060] [] __btrfs_add_ordered_extent+0x39b/0x450 [btrfs] [13654.481060] [] btrfs_add_ordered_extent+0x32/0x40 [btrfs] [13654.481060] [] run_delalloc_nocow+0x78a/0x9d0 [btrfs] [13654.481060] [] run_delalloc_range+0x31d/0x390 [btrfs] [13654.481060] [] __extent_writepage+0x310/0x780 [btrfs] [13654.481060] [] extent_write_cache_pages.isra.29.constprop.48+0x29a/0x410 [btrfs] [13654.481060] [] extent_writepages+0x4d/0x70 [btrfs] [13654.481060] [] btrfs_writepages+0x28/0x30 [btrfs] [13654.481060] [] do_writepages+0x21/0x50 [13654.481060] [] __filemap_fdatawrite_range+0x59/0x60 [13654.481060] [] filemap_fdatawrite_range+0x13/0x20 [13654.481060] [] btrfs_wait_ordered_range+0x49/0x140 [btrfs] [13654.481060] [] __btrfs_write_out_cache+0x682/0x8b0 [btrfs] [13654.481060] [] btrfs_write_out_cache+0x8d/0xe0 [btrfs] [13654.481060] [] btrfs_write_dirty_block_groups+0x593/0x680 [btrfs] [13654.481060] [] commit_cowonly_roots+0x14b/0x20d [btrfs] [13654.481060] [] btrfs_commit_transaction+0x43a/0x9d0 [btrfs] [13654.481060] [] btrfs_create_uuid_tree+0x5a/0x100 [btrfs] [13654.481060] [] open_ctree+0x21da/0x2210 [btrfs] [13654.481060] [] btrfs_mount+0x68e/0x870 [btrfs] [13654.481060] [] mount_fs+0x39/0x1b0 [13654.481060] [] vfs_kern_mount+0x63/0xf0 [13654.481060] [] do_mount+0x23e/0xa90 [13654.481060] [] SyS_mount+0x83/0xc0 [13654.481060] [] system_call_fastpath+0x16/0x1b [13654.481060] -> #0 (&(&root->ordered_extent_lock)->rlock){+.+...}: [13654.481060] [] __lock_acquire+0x150a/0x1a70 [13654.481060] [] lock_acquire+0x93/0x130 [13654.481060] [] _raw_spin_lock+0x41/0x50 [13654.481060] [] btrfs_cleanup_transaction+0x271/0x570 [btrfs] [13654.481060] [] transaction_kthread+0x22e/0x270 [btrfs] [13654.481060] [] kthread+0xea/0xf0 [13654.481060] [] ret_from_fork+0x7c/0xb0 [13654.481060] other info that might help us debug this: [13654.481060] Possible unsafe locking scenario: [13654.481060] CPU0 CPU1 [13654.481060] ---- ---- [13654.481060] lock(&(&fs_info->ordered_root_lock)->rlock); [13654.481060] lock(&(&root->ordered_extent_lock)->rlock); [13654.481060] lock(&(&fs_info->ordered_root_lock)->rlock); [13654.481060] lock(&(&root->ordered_extent_lock)->rlock); [13654.481060] *** DEADLOCK *** [...] ====================================================== btrfs_destroy_all_ordered_extents() gets &fs_info->ordered_root_lock __BEFORE__ acquiring &root->ordered_extent_lock, while btrfs_[add,remove]_ordered_extent() acquires &fs_info->ordered_root_lock __AFTER__ getting &root->ordered_extent_lock. This patch fixes the above problem. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 74c9be89fc0c..cc1b4237dc62 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3808,9 +3808,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) list_move_tail(&root->ordered_root, &fs_info->ordered_roots); + spin_unlock(&fs_info->ordered_root_lock); btrfs_destroy_ordered_extents(root); - cond_resched_lock(&fs_info->ordered_root_lock); + cond_resched(); + spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); } -- cgit v1.2.1 From 8b050d350c7846462a21e9e054c9154ede9b43cf Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Feb 2014 18:08:58 +0800 Subject: Btrfs: fix skipped error handle when log sync failed It is possible that many tasks sync the log tree at the same time, but only one task can do the sync work, the others will wait for it. But those wait tasks didn't get the result of the log sync, and returned 0 when they ended the wait. It caused those tasks skipped the error handle, and the serious problem was they told the users the file sync succeeded but in fact they failed. This patch fixes this problem by introducing a log context structure, we insert it into the a global list. When the sync fails, we will set the error number of every log context in the list, then the waiting tasks get the error number of the log context and handle the error if need. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cc1b4237dc62..44f52d280b7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1200,6 +1200,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); + INIT_LIST_HEAD(&root->log_ctxs[0]); + INIT_LIST_HEAD(&root->log_ctxs[1]); atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); -- cgit v1.2.1 From d1433debe7f4346cf9fc0dafc71c3137d2a97bc4 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Feb 2014 18:08:59 +0800 Subject: Btrfs: just wait or commit our own log sub-transaction We might commit the log sub-transaction which didn't contain the metadata we logged. It was because we didn't record the log transid and just select the current log sub-transaction to commit, but the right one might be committed by the other task already. Actually, we needn't do anything and it is safe that we go back directly in this case. This patch improves the log sync by the above idea. We record the transid of the log sub-transaction in which we log the metadata, and the transid of the log sub-transaction we have committed. If the committed transid is >= the transid we record when logging the metadata, we just go back. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44f52d280b7d..dd52146035b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1209,6 +1209,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->orphan_inodes, 0); atomic_set(&root->refs, 1); root->log_transid = 0; + root->log_transid_committed = -1; root->last_log_commit = 0; if (fs_info) extent_io_tree_init(&root->dirty_log_pages, @@ -1422,6 +1423,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root->log_root); root->log_root = log_root; root->log_transid = 0; + root->log_transid_committed = -1; root->last_log_commit = 0; return 0; } -- cgit v1.2.1 From 5cdc7ad337fb08f630ac3538fb10e4a75de2572d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:06 +0800 Subject: btrfs: Replace fs_info->workers with btrfs_workqueue. Use the newly created btrfs_workqueue_struct to replace the original fs_info->workers Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dd52146035b3..faafa5153fbd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -108,7 +108,7 @@ struct async_submit_bio { * can't tell us where in the file the bio should go */ u64 bio_offset; - struct btrfs_work work; + struct btrfs_work_struct work; int error; }; @@ -738,12 +738,12 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) { unsigned long limit = min_t(unsigned long, - info->workers.max_workers, + info->thread_pool_size, info->fs_devices->open_devices); return 256 * limit; } -static void run_one_async_start(struct btrfs_work *work) +static void run_one_async_start(struct btrfs_work_struct *work) { struct async_submit_bio *async; int ret; @@ -756,7 +756,7 @@ static void run_one_async_start(struct btrfs_work *work) async->error = ret; } -static void run_one_async_done(struct btrfs_work *work) +static void run_one_async_done(struct btrfs_work_struct *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; @@ -783,7 +783,7 @@ static void run_one_async_done(struct btrfs_work *work) async->bio_offset); } -static void run_one_async_free(struct btrfs_work *work) +static void run_one_async_free(struct btrfs_work_struct *work) { struct async_submit_bio *async; @@ -811,11 +811,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_start = submit_bio_start; async->submit_bio_done = submit_bio_done; - async->work.func = run_one_async_start; - async->work.ordered_func = run_one_async_done; - async->work.ordered_free = run_one_async_free; + btrfs_init_work(&async->work, run_one_async_start, + run_one_async_done, run_one_async_free); - async->work.flags = 0; async->bio_flags = bio_flags; async->bio_offset = bio_offset; @@ -824,9 +822,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); if (rw & REQ_SYNC) - btrfs_set_work_high_prio(&async->work); + btrfs_set_work_high_priority(&async->work); - btrfs_queue_worker(&fs_info->workers, &async->work); + btrfs_queue_work(fs_info->workers, &async->work); while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { @@ -2000,7 +1998,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->generic_worker); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->delalloc_workers); - btrfs_stop_workers(&fs_info->workers); + btrfs_destroy_workqueue(fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_meta_workers); btrfs_stop_workers(&fs_info->endio_raid56_workers); @@ -2104,6 +2102,8 @@ int open_ctree(struct super_block *sb, int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; + int max_active; + int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; bool create_uuid_tree; bool check_uuid_tree; @@ -2472,12 +2472,13 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } + max_active = fs_info->thread_pool_size; btrfs_init_workers(&fs_info->generic_worker, "genwork", 1, NULL); - btrfs_init_workers(&fs_info->workers, "worker", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->workers = + btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, + max_active, 16); btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", fs_info->thread_pool_size, NULL); @@ -2498,9 +2499,6 @@ int open_ctree(struct super_block *sb, */ fs_info->submit_workers.idle_thresh = 64; - fs_info->workers.idle_thresh = 16; - fs_info->workers.ordered = 1; - fs_info->delalloc_workers.idle_thresh = 2; fs_info->delalloc_workers.ordered = 1; @@ -2552,8 +2550,7 @@ int open_ctree(struct super_block *sb, * btrfs_start_workers can really only fail because of ENOMEM so just * return -ENOMEM if any of these fail. */ - ret = btrfs_start_workers(&fs_info->workers); - ret |= btrfs_start_workers(&fs_info->generic_worker); + ret = btrfs_start_workers(&fs_info->generic_worker); ret |= btrfs_start_workers(&fs_info->submit_workers); ret |= btrfs_start_workers(&fs_info->delalloc_workers); ret |= btrfs_start_workers(&fs_info->fixup_workers); @@ -2573,6 +2570,10 @@ int open_ctree(struct super_block *sb, err = -ENOMEM; goto fail_sb_buffer; } + if (!(fs_info->workers)) { + err = -ENOMEM; + goto fail_sb_buffer; + } fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, -- cgit v1.2.1 From afe3d24267926eb78ba863016bdd65cfe718aef5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:07 +0800 Subject: btrfs: Replace fs_info->delalloc_workers with btrfs_workqueue Much like the fs_info->workers, replace the fs_info->delalloc_workers use the same btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index faafa5153fbd..7eeb45f649bf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1997,7 +1997,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { btrfs_stop_workers(&fs_info->generic_worker); btrfs_stop_workers(&fs_info->fixup_workers); - btrfs_stop_workers(&fs_info->delalloc_workers); + btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_meta_workers); @@ -2480,8 +2480,8 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, max_active, 16); - btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", - fs_info->thread_pool_size, NULL); + fs_info->delalloc_workers = + btrfs_alloc_workqueue("delalloc", flags, max_active, 2); btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", fs_info->thread_pool_size, NULL); @@ -2499,9 +2499,6 @@ int open_ctree(struct super_block *sb, */ fs_info->submit_workers.idle_thresh = 64; - fs_info->delalloc_workers.idle_thresh = 2; - fs_info->delalloc_workers.ordered = 1; - btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, &fs_info->generic_worker); btrfs_init_workers(&fs_info->endio_workers, "endio", @@ -2552,7 +2549,6 @@ int open_ctree(struct super_block *sb, */ ret = btrfs_start_workers(&fs_info->generic_worker); ret |= btrfs_start_workers(&fs_info->submit_workers); - ret |= btrfs_start_workers(&fs_info->delalloc_workers); ret |= btrfs_start_workers(&fs_info->fixup_workers); ret |= btrfs_start_workers(&fs_info->endio_workers); ret |= btrfs_start_workers(&fs_info->endio_meta_workers); @@ -2570,7 +2566,7 @@ int open_ctree(struct super_block *sb, err = -ENOMEM; goto fail_sb_buffer; } - if (!(fs_info->workers)) { + if (!(fs_info->workers && fs_info->delalloc_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From a8c93d4ef6f6727764a61a2ee1c1878a755637c5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:08 +0800 Subject: btrfs: Replace fs_info->submit_workers with btrfs_workqueue. Much like the fs_info->workers, replace the fs_info->submit_workers use the same btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7eeb45f649bf..420328bacf49 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2006,7 +2006,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->endio_freespace_worker); - btrfs_stop_workers(&fs_info->submit_workers); + btrfs_destroy_workqueue(fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->caching_workers); btrfs_stop_workers(&fs_info->readahead_workers); @@ -2486,18 +2486,19 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", fs_info->thread_pool_size, NULL); - btrfs_init_workers(&fs_info->submit_workers, "submit", - min_t(u64, fs_devices->num_devices, - fs_info->thread_pool_size), NULL); btrfs_init_workers(&fs_info->caching_workers, "cache", fs_info->thread_pool_size, NULL); - /* a higher idle thresh on the submit workers makes it much more + /* + * a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the * devices */ - fs_info->submit_workers.idle_thresh = 64; + fs_info->submit_workers = + btrfs_alloc_workqueue("submit", flags, + min_t(u64, fs_devices->num_devices, + max_active), 64); btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, &fs_info->generic_worker); @@ -2548,7 +2549,6 @@ int open_ctree(struct super_block *sb, * return -ENOMEM if any of these fail. */ ret = btrfs_start_workers(&fs_info->generic_worker); - ret |= btrfs_start_workers(&fs_info->submit_workers); ret |= btrfs_start_workers(&fs_info->fixup_workers); ret |= btrfs_start_workers(&fs_info->endio_workers); ret |= btrfs_start_workers(&fs_info->endio_meta_workers); @@ -2566,7 +2566,8 @@ int open_ctree(struct super_block *sb, err = -ENOMEM; goto fail_sb_buffer; } - if (!(fs_info->workers && fs_info->delalloc_workers)) { + if (!(fs_info->workers && fs_info->delalloc_workers && + fs_info->submit_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From a44903abe9dc23ffa305898368a7a910dbae13c5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:09 +0800 Subject: btrfs: Replace fs_info->flush_workers with btrfs_workqueue. Replace the fs_info->submit_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 420328bacf49..5b82b0b31ec8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2010,7 +2010,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->caching_workers); btrfs_stop_workers(&fs_info->readahead_workers); - btrfs_stop_workers(&fs_info->flush_workers); + btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } @@ -2483,9 +2483,8 @@ int open_ctree(struct super_block *sb, fs_info->delalloc_workers = btrfs_alloc_workqueue("delalloc", flags, max_active, 2); - btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", - fs_info->thread_pool_size, NULL); - + fs_info->flush_workers = + btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); btrfs_init_workers(&fs_info->caching_workers, "cache", fs_info->thread_pool_size, NULL); @@ -2560,14 +2559,13 @@ int open_ctree(struct super_block *sb, ret |= btrfs_start_workers(&fs_info->delayed_workers); ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); - ret |= btrfs_start_workers(&fs_info->flush_workers); ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { err = -ENOMEM; goto fail_sb_buffer; } if (!(fs_info->workers && fs_info->delalloc_workers && - fs_info->submit_workers)) { + fs_info->submit_workers && fs_info->flush_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From fccb5d86d8f52161e013025ccf3101d8fab99a32 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:10 +0800 Subject: btrfs: Replace fs_info->endio_* workqueue with btrfs_workqueue. Replace the fs_info->endio_* workqueues with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 104 ++++++++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 58 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5b82b0b31ec8..8ce0214e3bac 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -55,7 +55,7 @@ #endif static struct extent_io_ops btree_extent_io_ops; -static void end_workqueue_fn(struct btrfs_work *work); +static void end_workqueue_fn(struct btrfs_work_struct *work); static void free_fs_root(struct btrfs_root *root); static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); @@ -86,7 +86,7 @@ struct end_io_wq { int error; int metadata; struct list_head list; - struct btrfs_work work; + struct btrfs_work_struct work; }; /* @@ -678,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err) fs_info = end_io_wq->info; end_io_wq->error = err; - end_io_wq->work.func = end_workqueue_fn; - end_io_wq->work.flags = 0; + btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); if (bio->bi_rw & REQ_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) - btrfs_queue_worker(&fs_info->endio_meta_write_workers, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_meta_write_workers, + &end_io_wq->work); else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) - btrfs_queue_worker(&fs_info->endio_freespace_worker, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_freespace_worker, + &end_io_wq->work); else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_worker(&fs_info->endio_raid56_workers, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_raid56_workers, + &end_io_wq->work); else - btrfs_queue_worker(&fs_info->endio_write_workers, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_write_workers, + &end_io_wq->work); } else { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_worker(&fs_info->endio_raid56_workers, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_raid56_workers, + &end_io_wq->work); else if (end_io_wq->metadata) - btrfs_queue_worker(&fs_info->endio_meta_workers, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_meta_workers, + &end_io_wq->work); else - btrfs_queue_worker(&fs_info->endio_workers, - &end_io_wq->work); + btrfs_queue_work(fs_info->endio_workers, + &end_io_wq->work); } } @@ -1669,7 +1668,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) * called by the kthread helper functions to finally call the bio end_io * functions. This is where read checksum verification actually happens */ -static void end_workqueue_fn(struct btrfs_work *work) +static void end_workqueue_fn(struct btrfs_work_struct *work) { struct bio *bio; struct end_io_wq *end_io_wq; @@ -1999,13 +1998,13 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->workers); - btrfs_stop_workers(&fs_info->endio_workers); - btrfs_stop_workers(&fs_info->endio_meta_workers); - btrfs_stop_workers(&fs_info->endio_raid56_workers); + btrfs_destroy_workqueue(fs_info->endio_workers); + btrfs_destroy_workqueue(fs_info->endio_meta_workers); + btrfs_destroy_workqueue(fs_info->endio_raid56_workers); btrfs_stop_workers(&fs_info->rmw_workers); - btrfs_stop_workers(&fs_info->endio_meta_write_workers); - btrfs_stop_workers(&fs_info->endio_write_workers); - btrfs_stop_workers(&fs_info->endio_freespace_worker); + btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); + btrfs_destroy_workqueue(fs_info->endio_write_workers); + btrfs_destroy_workqueue(fs_info->endio_freespace_worker); btrfs_destroy_workqueue(fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->caching_workers); @@ -2501,26 +2500,26 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, &fs_info->generic_worker); - btrfs_init_workers(&fs_info->endio_workers, "endio", - fs_info->thread_pool_size, - &fs_info->generic_worker); - btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", - fs_info->thread_pool_size, - &fs_info->generic_worker); - btrfs_init_workers(&fs_info->endio_meta_write_workers, - "endio-meta-write", fs_info->thread_pool_size, - &fs_info->generic_worker); - btrfs_init_workers(&fs_info->endio_raid56_workers, - "endio-raid56", fs_info->thread_pool_size, - &fs_info->generic_worker); + + /* + * endios are largely parallel and should have a very + * low idle thresh + */ + fs_info->endio_workers = + btrfs_alloc_workqueue("endio", flags, max_active, 4); + fs_info->endio_meta_workers = + btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); + fs_info->endio_meta_write_workers = + btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); + fs_info->endio_raid56_workers = + btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); btrfs_init_workers(&fs_info->rmw_workers, "rmw", fs_info->thread_pool_size, &fs_info->generic_worker); - btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", - fs_info->thread_pool_size, - &fs_info->generic_worker); - btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", - 1, &fs_info->generic_worker); + fs_info->endio_write_workers = + btrfs_alloc_workqueue("endio-write", flags, max_active, 2); + fs_info->endio_freespace_worker = + btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", fs_info->thread_pool_size, &fs_info->generic_worker); @@ -2530,17 +2529,8 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, &fs_info->generic_worker); - /* - * endios are largely parallel and should have a very - * low idle thresh - */ - fs_info->endio_workers.idle_thresh = 4; - fs_info->endio_meta_workers.idle_thresh = 4; - fs_info->endio_raid56_workers.idle_thresh = 4; fs_info->rmw_workers.idle_thresh = 2; - fs_info->endio_write_workers.idle_thresh = 2; - fs_info->endio_meta_write_workers.idle_thresh = 2; fs_info->readahead_workers.idle_thresh = 2; /* @@ -2549,13 +2539,7 @@ int open_ctree(struct super_block *sb, */ ret = btrfs_start_workers(&fs_info->generic_worker); ret |= btrfs_start_workers(&fs_info->fixup_workers); - ret |= btrfs_start_workers(&fs_info->endio_workers); - ret |= btrfs_start_workers(&fs_info->endio_meta_workers); ret |= btrfs_start_workers(&fs_info->rmw_workers); - ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); - ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); - ret |= btrfs_start_workers(&fs_info->endio_write_workers); - ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); ret |= btrfs_start_workers(&fs_info->delayed_workers); ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); @@ -2565,7 +2549,11 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } if (!(fs_info->workers && fs_info->delalloc_workers && - fs_info->submit_workers && fs_info->flush_workers)) { + fs_info->submit_workers && fs_info->flush_workers && + fs_info->endio_workers && fs_info->endio_meta_workers && + fs_info->endio_meta_write_workers && + fs_info->endio_write_workers && fs_info->endio_raid56_workers && + fs_info->endio_freespace_worker)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From d05a33ac265c62d4be35788dd978b2665033f077 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:11 +0800 Subject: btrfs: Replace fs_info->rmw_workers workqueue with btrfs_workqueue. Replace the fs_info->rmw_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8ce0214e3bac..5f12806e96e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2001,7 +2001,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->endio_workers); btrfs_destroy_workqueue(fs_info->endio_meta_workers); btrfs_destroy_workqueue(fs_info->endio_raid56_workers); - btrfs_stop_workers(&fs_info->rmw_workers); + btrfs_destroy_workqueue(fs_info->rmw_workers); btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); btrfs_destroy_workqueue(fs_info->endio_freespace_worker); @@ -2513,9 +2513,8 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); fs_info->endio_raid56_workers = btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); - btrfs_init_workers(&fs_info->rmw_workers, - "rmw", fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->rmw_workers = + btrfs_alloc_workqueue("rmw", flags, max_active, 2); fs_info->endio_write_workers = btrfs_alloc_workqueue("endio-write", flags, max_active, 2); fs_info->endio_freespace_worker = @@ -2529,8 +2528,6 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, &fs_info->generic_worker); - fs_info->rmw_workers.idle_thresh = 2; - fs_info->readahead_workers.idle_thresh = 2; /* @@ -2539,7 +2536,6 @@ int open_ctree(struct super_block *sb, */ ret = btrfs_start_workers(&fs_info->generic_worker); ret |= btrfs_start_workers(&fs_info->fixup_workers); - ret |= btrfs_start_workers(&fs_info->rmw_workers); ret |= btrfs_start_workers(&fs_info->delayed_workers); ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); @@ -2553,7 +2549,7 @@ int open_ctree(struct super_block *sb, fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->endio_meta_write_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && - fs_info->endio_freespace_worker)) { + fs_info->endio_freespace_worker && fs_info->rmw_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From e66f0bb14465371d4c86fa70cff2acc331efa1fb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:12 +0800 Subject: btrfs: Replace fs_info->cache_workers workqueue with btrfs_workqueue. Replace the fs_info->cache_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5f12806e96e8..9c14e3bd078c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2007,7 +2007,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->endio_freespace_worker); btrfs_destroy_workqueue(fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); - btrfs_stop_workers(&fs_info->caching_workers); + btrfs_destroy_workqueue(fs_info->caching_workers); btrfs_stop_workers(&fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_stop_workers(&fs_info->qgroup_rescan_workers); @@ -2485,8 +2485,8 @@ int open_ctree(struct super_block *sb, fs_info->flush_workers = btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); - btrfs_init_workers(&fs_info->caching_workers, "cache", - fs_info->thread_pool_size, NULL); + fs_info->caching_workers = + btrfs_alloc_workqueue("cache", flags, max_active, 0); /* * a higher idle thresh on the submit workers makes it much more @@ -2537,7 +2537,6 @@ int open_ctree(struct super_block *sb, ret = btrfs_start_workers(&fs_info->generic_worker); ret |= btrfs_start_workers(&fs_info->fixup_workers); ret |= btrfs_start_workers(&fs_info->delayed_workers); - ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { @@ -2549,7 +2548,8 @@ int open_ctree(struct super_block *sb, fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->endio_meta_write_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && - fs_info->endio_freespace_worker && fs_info->rmw_workers)) { + fs_info->endio_freespace_worker && fs_info->rmw_workers && + fs_info->caching_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From 736cfa15e89a654436d4149c109bf1ae09fc67cf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:13 +0800 Subject: btrfs: Replace fs_info->readahead_workers workqueue with btrfs_workqueue. Replace the fs_info->readahead_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9c14e3bd078c..c0b003bb66cd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2008,7 +2008,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); btrfs_destroy_workqueue(fs_info->caching_workers); - btrfs_stop_workers(&fs_info->readahead_workers); + btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } @@ -2522,14 +2522,11 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", fs_info->thread_pool_size, &fs_info->generic_worker); - btrfs_init_workers(&fs_info->readahead_workers, "readahead", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->readahead_workers = + btrfs_alloc_workqueue("readahead", flags, max_active, 2); btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, &fs_info->generic_worker); - fs_info->readahead_workers.idle_thresh = 2; - /* * btrfs_start_workers can really only fail because of ENOMEM so just * return -ENOMEM if any of these fail. @@ -2537,7 +2534,6 @@ int open_ctree(struct super_block *sb, ret = btrfs_start_workers(&fs_info->generic_worker); ret |= btrfs_start_workers(&fs_info->fixup_workers); ret |= btrfs_start_workers(&fs_info->delayed_workers); - ret |= btrfs_start_workers(&fs_info->readahead_workers); ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { err = -ENOMEM; @@ -2549,7 +2545,7 @@ int open_ctree(struct super_block *sb, fs_info->endio_meta_write_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && - fs_info->caching_workers)) { + fs_info->caching_workers && fs_info->readahead_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From dc6e320998fb907e4c19032d545d461bfe5040d1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:14 +0800 Subject: btrfs: Replace fs_info->fixup_workers workqueue with btrfs_workqueue. Replace the fs_info->fixup_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c0b003bb66cd..392cd3baefe4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1995,7 +1995,7 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { btrfs_stop_workers(&fs_info->generic_worker); - btrfs_stop_workers(&fs_info->fixup_workers); + btrfs_destroy_workqueue(fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->workers); btrfs_destroy_workqueue(fs_info->endio_workers); @@ -2498,8 +2498,8 @@ int open_ctree(struct super_block *sb, min_t(u64, fs_devices->num_devices, max_active), 64); - btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, - &fs_info->generic_worker); + fs_info->fixup_workers = + btrfs_alloc_workqueue("fixup", flags, 1, 0); /* * endios are largely parallel and should have a very @@ -2532,7 +2532,6 @@ int open_ctree(struct super_block *sb, * return -ENOMEM if any of these fail. */ ret = btrfs_start_workers(&fs_info->generic_worker); - ret |= btrfs_start_workers(&fs_info->fixup_workers); ret |= btrfs_start_workers(&fs_info->delayed_workers); ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { @@ -2545,7 +2544,8 @@ int open_ctree(struct super_block *sb, fs_info->endio_meta_write_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && - fs_info->caching_workers && fs_info->readahead_workers)) { + fs_info->caching_workers && fs_info->readahead_workers && + fs_info->fixup_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From 5b3bc44e2e69d42edf40ca3785040d233ca949f4 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:15 +0800 Subject: btrfs: Replace fs_info->delayed_workers workqueue with btrfs_workqueue. Replace the fs_info->delayed_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 392cd3baefe4..f5da1fd23ee9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2006,7 +2006,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->endio_write_workers); btrfs_destroy_workqueue(fs_info->endio_freespace_worker); btrfs_destroy_workqueue(fs_info->submit_workers); - btrfs_stop_workers(&fs_info->delayed_workers); + btrfs_destroy_workqueue(fs_info->delayed_workers); btrfs_destroy_workqueue(fs_info->caching_workers); btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); @@ -2519,9 +2519,8 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("endio-write", flags, max_active, 2); fs_info->endio_freespace_worker = btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); - btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->delayed_workers = + btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); fs_info->readahead_workers = btrfs_alloc_workqueue("readahead", flags, max_active, 2); btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, @@ -2532,7 +2531,6 @@ int open_ctree(struct super_block *sb, * return -ENOMEM if any of these fail. */ ret = btrfs_start_workers(&fs_info->generic_worker); - ret |= btrfs_start_workers(&fs_info->delayed_workers); ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { err = -ENOMEM; @@ -2545,7 +2543,7 @@ int open_ctree(struct super_block *sb, fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && - fs_info->fixup_workers)) { + fs_info->fixup_workers && fs_info->delayed_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From fc97fab0ea59fb923cbe91b7d208ffc6f1d8a95c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:16 +0800 Subject: btrfs: Replace fs_info->qgroup_rescan_worker workqueue with btrfs_workqueue. Replace the fs_info->qgroup_rescan_worker with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5da1fd23ee9..9aaf9c309b54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2010,7 +2010,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->caching_workers); btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); - btrfs_stop_workers(&fs_info->qgroup_rescan_workers); + btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); } static void free_root_extent_buffers(struct btrfs_root *root) @@ -2523,15 +2523,14 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); fs_info->readahead_workers = btrfs_alloc_workqueue("readahead", flags, max_active, 2); - btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, - &fs_info->generic_worker); + fs_info->qgroup_rescan_workers = + btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); /* * btrfs_start_workers can really only fail because of ENOMEM so just * return -ENOMEM if any of these fail. */ ret = btrfs_start_workers(&fs_info->generic_worker); - ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { err = -ENOMEM; goto fail_sb_buffer; @@ -2543,7 +2542,8 @@ int open_ctree(struct super_block *sb, fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && - fs_info->fixup_workers && fs_info->delayed_workers)) { + fs_info->fixup_workers && fs_info->delayed_workers && + fs_info->qgroup_rescan_workers)) { err = -ENOMEM; goto fail_sb_buffer; } -- cgit v1.2.1 From a046e9c88b0f46677923864295eac7c92cd962cb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:18 +0800 Subject: btrfs: Cleanup the old btrfs_worker. Since all the btrfs_worker is replaced with the newly created btrfs_workqueue, the old codes can be easily remove. Signed-off-by: Quwenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9aaf9c309b54..c80d9507171c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1994,7 +1994,6 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, /* helper to cleanup workers */ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { - btrfs_stop_workers(&fs_info->generic_worker); btrfs_destroy_workqueue(fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->workers); @@ -2472,8 +2471,6 @@ int open_ctree(struct super_block *sb, } max_active = fs_info->thread_pool_size; - btrfs_init_workers(&fs_info->generic_worker, - "genwork", 1, NULL); fs_info->workers = btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, @@ -2526,15 +2523,6 @@ int open_ctree(struct super_block *sb, fs_info->qgroup_rescan_workers = btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); - /* - * btrfs_start_workers can really only fail because of ENOMEM so just - * return -ENOMEM if any of these fail. - */ - ret = btrfs_start_workers(&fs_info->generic_worker); - if (ret) { - err = -ENOMEM; - goto fail_sb_buffer; - } if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->submit_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && -- cgit v1.2.1 From d458b0540ebd728b4d6ef47cc5ef0dbfd4dd361a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:19 +0800 Subject: btrfs: Cleanup the "_struct" suffix in btrfs_workequeue Since the "_struct" suffix is mainly used for distinguish the differnt btrfs_work between the original and the newly created one, there is no need using the suffix since all btrfs_workers are changed into btrfs_workqueue. Also this patch fixed some codes whose code style is changed due to the too long "_struct" suffix. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c80d9507171c..f7d84d955764 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -55,7 +55,7 @@ #endif static struct extent_io_ops btree_extent_io_ops; -static void end_workqueue_fn(struct btrfs_work_struct *work); +static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); @@ -86,7 +86,7 @@ struct end_io_wq { int error; int metadata; struct list_head list; - struct btrfs_work_struct work; + struct btrfs_work work; }; /* @@ -108,7 +108,7 @@ struct async_submit_bio { * can't tell us where in the file the bio should go */ u64 bio_offset; - struct btrfs_work_struct work; + struct btrfs_work work; int error; }; @@ -742,7 +742,7 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) return 256 * limit; } -static void run_one_async_start(struct btrfs_work_struct *work) +static void run_one_async_start(struct btrfs_work *work) { struct async_submit_bio *async; int ret; @@ -755,7 +755,7 @@ static void run_one_async_start(struct btrfs_work_struct *work) async->error = ret; } -static void run_one_async_done(struct btrfs_work_struct *work) +static void run_one_async_done(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; @@ -782,7 +782,7 @@ static void run_one_async_done(struct btrfs_work_struct *work) async->bio_offset); } -static void run_one_async_free(struct btrfs_work_struct *work) +static void run_one_async_free(struct btrfs_work *work) { struct async_submit_bio *async; @@ -1668,7 +1668,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) * called by the kthread helper functions to finally call the bio end_io * functions. This is where read checksum verification actually happens */ -static void end_workqueue_fn(struct btrfs_work_struct *work) +static void end_workqueue_fn(struct btrfs_work *work) { struct bio *bio; struct end_io_wq *end_io_wq; -- cgit v1.2.1 From 8257b2dc3c1a1057b84a589827354abdc4c767fd Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Mar 2014 13:38:19 +0800 Subject: Btrfs: introduce btrfs_{start, end}_nocow_write() for each subvolume If the snapshot creation happened after the nocow write but before the dirty data flush, we would fail to flush the dirty data because of no space. So we must keep track of when those nocow write operations start and when they end, if there are nocow writers, the snapshot creators must wait. In order to implement this function, I introduce btrfs_{start, end}_nocow_write(), which is similar to mnt_{want,drop}_write(). These two functions are only used for nocow file write operations. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f7d84d955764..7d09ca48c347 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1149,6 +1149,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } +static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) +{ + struct btrfs_subvolume_writers *writers; + int ret; + + writers = kmalloc(sizeof(*writers), GFP_NOFS); + if (!writers) + return ERR_PTR(-ENOMEM); + + ret = percpu_counter_init(&writers->counter, 0); + if (ret < 0) { + kfree(writers); + return ERR_PTR(ret); + } + + init_waitqueue_head(&writers->wait); + return writers; +} + +static void +btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) +{ + percpu_counter_destroy(&writers->counter); + kfree(writers); +} + static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -1205,6 +1231,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->log_batch, 0); atomic_set(&root->orphan_inodes, 0); atomic_set(&root->refs, 1); + atomic_set(&root->will_be_snapshoted, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; @@ -1502,6 +1529,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, int btrfs_init_fs_root(struct btrfs_root *root) { int ret; + struct btrfs_subvolume_writers *writers; root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), @@ -1511,6 +1539,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) goto fail; } + writers = btrfs_alloc_subvolume_writers(); + if (IS_ERR(writers)) { + ret = PTR_ERR(writers); + goto fail; + } + root->subv_writers = writers; + btrfs_init_free_ino_ctl(root); mutex_init(&root->fs_commit_mutex); spin_lock_init(&root->cache_lock); @@ -1518,8 +1553,11 @@ int btrfs_init_fs_root(struct btrfs_root *root) ret = get_anon_bdev(&root->anon_dev); if (ret) - goto fail; + goto free_writers; return 0; + +free_writers: + btrfs_free_subvolume_writers(root->subv_writers); fail: kfree(root->free_ino_ctl); kfree(root->free_ino_pinned); @@ -3459,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root) root->orphan_block_rsv = NULL; if (root->anon_dev) free_anon_bdev(root->anon_dev); + if (root->subv_writers) + btrfs_free_subvolume_writers(root->subv_writers); free_extent_buffer(root->node); free_extent_buffer(root->commit_root); kfree(root->free_ino_ctl); -- cgit v1.2.1 From 31f3d255c677073f83daa1e0671bbf2157bf8edc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Mar 2014 13:55:02 +0800 Subject: Btrfs: split the global ordered extents mutex When we create a snapshot, we just need wait the ordered extents in the source fs/file root, but because we use the global mutex to protect this ordered extents list of the source fs/file root to avoid accessing a empty list, if someone got the mutex to access the ordered extents list of the other fs/file root, we had to wait. This patch splits the above global mutex, now every fs/file root has its own mutex to protect its own list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7d09ca48c347..237b5b5a2200 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1220,6 +1220,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->log_extents_lock[1]); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + mutex_init(&root->ordered_extent_mutex); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); -- cgit v1.2.1 From 573bfb72f7608eb7097d2dd036a714a6ab20cffe Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Mar 2014 13:55:03 +0800 Subject: Btrfs: fix possible empty list access when flushing the delalloc inodes We didn't have a lock to protect the access to the delalloc inodes list, that is we might access a empty delalloc inodes list if someone start flushing delalloc inodes because the delalloc inodes were moved into a other list temporarily. Fix it by wrapping the access with a lock. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 237b5b5a2200..d9698fda2d12 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1221,6 +1221,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); mutex_init(&root->ordered_extent_mutex); + mutex_init(&root->delalloc_mutex); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); @@ -2209,6 +2210,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->buffer_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); + mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); init_completion(&fs_info->kobj_unregister); -- cgit v1.2.1