summaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/Makefile2
-rw-r--r--drivers/md/bcache/alloc.c23
-rw-r--r--drivers/md/bcache/bcache.h9
-rw-r--r--drivers/md/bcache/bset.c22
-rw-r--r--drivers/md/bcache/bset.h3
-rw-r--r--drivers/md/bcache/btree.c56
-rw-r--r--drivers/md/bcache/btree.h2
-rw-r--r--drivers/md/bcache/closure.c17
-rw-r--r--drivers/md/bcache/debug.c5
-rw-r--r--drivers/md/bcache/journal.c78
-rw-r--r--drivers/md/bcache/request.c29
-rw-r--r--drivers/md/bcache/stats.c10
-rw-r--r--drivers/md/bcache/super.c230
-rw-r--r--drivers/md/bcache/sysfs.c30
-rw-r--r--drivers/md/bcache/writeback.c4
15 files changed, 339 insertions, 181 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index d26b35195825..fd714628da6a 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -5,5 +5,3 @@ obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
util.o writeback.o
-
-CFLAGS_request.o += -Iblock
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 6f776823b9ba..8bc1faf71ff2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -67,6 +67,7 @@
#include <linux/blkdev.h>
#include <linux/kthread.h>
#include <linux/random.h>
+#include <linux/sched/signal.h>
#include <trace/events/bcache.h>
#define MAX_OPEN_BUCKETS 128
@@ -377,7 +378,10 @@ retry_invalidate:
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
- bch_prio_write(ca);
+ if (bch_prio_write(ca, false) < 0) {
+ ca->invalidate_needs_gc = 1;
+ wake_up_gc(ca->set);
+ }
}
}
out:
@@ -730,8 +734,21 @@ int bch_open_buckets_alloc(struct cache_set *c)
int bch_cache_allocator_start(struct cache *ca)
{
- struct task_struct *k = kthread_run(bch_allocator_thread,
- ca, "bcache_allocator");
+ struct task_struct *k;
+
+ /*
+ * In case previous btree check operation occupies too many
+ * system memory for bcache btree node cache, and the
+ * registering process is selected by OOM killer. Here just
+ * ignore the SIGKILL sent by OOM killer if there is, to
+ * avoid kthread_run() being failed by pending signals. The
+ * bcache registering process will exit after the registration
+ * done.
+ */
+ if (signal_pending(current))
+ flush_signals(current);
+
+ k = kthread_run(bch_allocator_thread, ca, "bcache_allocator");
if (IS_ERR(k))
return PTR_ERR(k);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 013e35a9e317..74a9849ea164 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -301,6 +301,7 @@ struct cached_dev {
struct block_device *bdev;
struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct closure sb_write;
@@ -329,6 +330,9 @@ struct cached_dev {
*/
atomic_t has_dirty;
+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
@@ -403,6 +407,7 @@ enum alloc_reserve {
struct cache {
struct cache_set *set;
struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
@@ -582,6 +587,7 @@ struct cache_set {
*/
wait_queue_head_t btree_cache_wait;
struct task_struct *btree_cache_alloc_lock;
+ spinlock_t btree_cannibalize_lock;
/*
* When we free a btree node, we increment the gen of the bucket the
@@ -723,6 +729,7 @@ struct cache_set {
unsigned int gc_always_rewrite:1;
unsigned int shrinker_disabled:1;
unsigned int copy_gc_enabled:1;
+ unsigned int idle_max_writeback_rate_enabled:1;
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
@@ -977,7 +984,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-void bch_prio_write(struct cache *ca);
+int bch_prio_write(struct cache *ca, bool wait);
void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 08768796b543..4385303836d8 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -155,6 +155,7 @@ int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
return 0;
}
+/* Pop the top key of keylist by pointing l->top to its previous key */
struct bkey *bch_keylist_pop(struct keylist *l)
{
struct bkey *k = l->keys;
@@ -168,6 +169,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
return l->top = k;
}
+/* Pop the bottom key of keylist and update l->top_p */
void bch_keylist_pop_front(struct keylist *l)
{
l->top_p -= bkey_u64s(l->keys);
@@ -309,7 +311,6 @@ void bch_btree_keys_free(struct btree_keys *b)
t->tree = NULL;
t->data = NULL;
}
-EXPORT_SYMBOL(bch_btree_keys_free);
int bch_btree_keys_alloc(struct btree_keys *b,
unsigned int page_order,
@@ -342,7 +343,6 @@ err:
bch_btree_keys_free(b);
return -ENOMEM;
}
-EXPORT_SYMBOL(bch_btree_keys_alloc);
void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
bool *expensive_debug_checks)
@@ -361,7 +361,6 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
* any more.
*/
}
-EXPORT_SYMBOL(bch_btree_keys_init);
/* Binary tree stuff for auxiliary search trees */
@@ -678,7 +677,6 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
bch_bset_build_unwritten_tree(b);
}
-EXPORT_SYMBOL(bch_bset_init_next);
/*
* Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
@@ -732,7 +730,6 @@ void bch_bset_build_written_tree(struct btree_keys *b)
j = inorder_next(j, t->size))
make_bfloat(t, j);
}
-EXPORT_SYMBOL(bch_bset_build_written_tree);
/* Insert */
@@ -780,7 +777,6 @@ fix_right: do {
j = j * 2 + 1;
} while (j < t->size);
}
-EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
@@ -855,7 +851,6 @@ bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
return b->ops->key_merge(b, l, r);
}
-EXPORT_SYMBOL(bch_bkey_try_merge);
void bch_bset_insert(struct btree_keys *b, struct bkey *where,
struct bkey *insert)
@@ -875,7 +870,6 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
bkey_copy(where, insert);
bch_bset_fix_lookup_table(b, t, where);
}
-EXPORT_SYMBOL(bch_bset_insert);
unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bkey *replace_key)
@@ -931,7 +925,6 @@ copy: bkey_copy(m, k);
merged:
return status;
}
-EXPORT_SYMBOL(bch_btree_insert_key);
/* Lookup */
@@ -1077,7 +1070,6 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
return i.l;
}
-EXPORT_SYMBOL(__bch_bset_search);
/* Btree iterator */
@@ -1132,7 +1124,6 @@ struct bkey *bch_btree_iter_init(struct btree_keys *b,
{
return __bch_btree_iter_init(b, iter, search, b->set);
}
-EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
@@ -1165,7 +1156,6 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
return __bch_btree_iter_next(iter, btree_iter_cmp);
}
-EXPORT_SYMBOL(bch_btree_iter_next);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
struct btree_keys *b, ptr_filter_fn fn)
@@ -1196,7 +1186,6 @@ int bch_bset_sort_state_init(struct bset_sort_state *state,
return mempool_init_page_pool(&state->pool, 1, page_order);
}
-EXPORT_SYMBOL(bch_bset_sort_state_init);
static void btree_mergesort(struct btree_keys *b, struct bset *out,
struct btree_iter *iter,
@@ -1268,6 +1257,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
* Our temporary buffer is the same size as the btree node's
* buffer, we can just swap buffers instead of doing a big
* memcpy()
+ *
+ * Don't worry event 'out' is allocated from mempool, it can
+ * still be swapped here. Because state->pool is a page mempool
+ * creaated by by mempool_init_page_pool(), which allocates
+ * pages by alloc_pages() indeed.
*/
out->magic = b->set->data->magic;
@@ -1313,7 +1307,6 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
-EXPORT_SYMBOL(bch_btree_sort_partial);
void bch_btree_sort_and_fix_extents(struct btree_keys *b,
struct btree_iter *iter,
@@ -1366,7 +1359,6 @@ void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
out:
bch_bset_build_written_tree(b);
}
-EXPORT_SYMBOL(bch_btree_sort_lazy);
void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
{
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index c71365e7c1fa..a50dcfda656f 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -397,7 +397,8 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state);
/* Bkey utility code */
-#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
+#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \
+ (unsigned int)(i)->keys)
static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx)
{
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ba434d9ac720..b12186c87f52 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -34,6 +34,7 @@
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
#include <linux/rculist.h>
#include <linux/delay.h>
#include <trace/events/bcache.h>
@@ -543,6 +544,11 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
set_btree_node_dirty(b);
+ /*
+ * w->journal is always the oldest journal pin of all bkeys
+ * in the leaf node, to make sure the oldest jset seq won't
+ * be increased before this btree node is flushed.
+ */
if (journal_ref) {
if (w->journal &&
journal_pin_cmp(b->c, w->journal, journal_ref)) {
@@ -723,38 +729,38 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
* IO can always make forward progress:
*/
nr /= c->btree_pages;
+ if (nr == 0)
+ nr = 1;
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
btree_cache_used = c->btree_cache_used;
- list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
+ list_for_each_entry_safe_reverse(b, t, &c->btree_cache_freeable, list) {
if (nr <= 0)
goto out;
- if (++i > 3 &&
- !mca_reap(b, 0, false)) {
+ if (!mca_reap(b, 0, false)) {
mca_data_free(b);
rw_unlock(true, b);
freed++;
}
nr--;
+ i++;
}
- for (; (nr--) && i < btree_cache_used; i++) {
- if (list_empty(&c->btree_cache))
+ list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
+ if (nr <= 0 || i >= btree_cache_used)
goto out;
- b = list_first_entry(&c->btree_cache, struct btree, list);
- list_rotate_left(&c->btree_cache);
-
- if (!b->accessed &&
- !mca_reap(b, 0, false)) {
+ if (!mca_reap(b, 0, false)) {
mca_bucket_free(b);
mca_data_free(b);
rw_unlock(true, b);
freed++;
- } else
- b->accessed = 0;
+ }
+
+ nr--;
+ i++;
}
out:
mutex_unlock(&c->bucket_lock);
@@ -884,15 +890,17 @@ out:
static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
{
- struct task_struct *old;
-
- old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
- if (old && old != current) {
+ spin_lock(&c->btree_cannibalize_lock);
+ if (likely(c->btree_cache_alloc_lock == NULL)) {
+ c->btree_cache_alloc_lock = current;
+ } else if (c->btree_cache_alloc_lock != current) {
if (op)
prepare_to_wait(&c->btree_cache_wait, &op->wait,
TASK_UNINTERRUPTIBLE);
+ spin_unlock(&c->btree_cannibalize_lock);
return -EINTR;
}
+ spin_unlock(&c->btree_cannibalize_lock);
return 0;
}
@@ -927,10 +935,12 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
*/
static void bch_cannibalize_unlock(struct cache_set *c)
{
+ spin_lock(&c->btree_cannibalize_lock);
if (c->btree_cache_alloc_lock == current) {
c->btree_cache_alloc_lock = NULL;
wake_up(&c->btree_cache_wait);
}
+ spin_unlock(&c->btree_cannibalize_lock);
}
static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
@@ -1058,7 +1068,6 @@ retry:
BUG_ON(!b->written);
b->parent = parent;
- b->accessed = 1;
for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
prefetch(b->keys.set[i].tree);
@@ -1149,7 +1158,6 @@ retry:
goto retry;
}
- b->accessed = 1;
b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
@@ -1906,6 +1914,18 @@ static int bch_gc_thread(void *arg)
int bch_gc_thread_start(struct cache_set *c)
{
+ /*
+ * In case previous btree check operation occupies too many
+ * system memory for bcache btree node cache, and the
+ * registering process is selected by OOM killer. Here just
+ * ignore the SIGKILL sent by OOM killer if there is, to
+ * avoid kthread_run() being failed by pending signals. The
+ * bcache registering process will exit after the registration
+ * done.
+ */
+ if (signal_pending(current))
+ flush_signals(current);
+
c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
return PTR_ERR_OR_ZERO(c->gc_thread);
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 76cfd121a486..f4dcca449391 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -121,8 +121,6 @@ struct btree {
/* Key/pointer for this btree node */
BKEY_PADDED(key);
- /* Single bit - set when accessed, cleared by shrinker */
- unsigned long accessed;
unsigned long seq;
struct rw_semaphore lock;
struct cache_set *c;
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 73f5319295bc..0164a1fe94a9 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -45,7 +45,6 @@ void closure_sub(struct closure *cl, int v)
{
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
}
-EXPORT_SYMBOL(closure_sub);
/*
* closure_put - decrement a closure's refcount
@@ -54,7 +53,6 @@ void closure_put(struct closure *cl)
{
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
}
-EXPORT_SYMBOL(closure_put);
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -76,7 +74,6 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
closure_sub(cl, CLOSURE_WAITING + 1);
}
}
-EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
@@ -96,7 +93,6 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
return true;
}
-EXPORT_SYMBOL(closure_wait);
struct closure_syncer {
struct task_struct *task;
@@ -105,8 +101,14 @@ struct closure_syncer {
static void closure_sync_fn(struct closure *cl)
{
- cl->s->done = 1;
- wake_up_process(cl->s->task);
+ struct closure_syncer *s = cl->s;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ p = READ_ONCE(s->task);
+ s->done = 1;
+ wake_up_process(p);
+ rcu_read_unlock();
}
void __sched __closure_sync(struct closure *cl)
@@ -125,7 +127,6 @@ void __sched __closure_sync(struct closure *cl)
__set_current_state(TASK_RUNNING);
}
-EXPORT_SYMBOL(__closure_sync);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
@@ -143,7 +144,6 @@ void closure_debug_create(struct closure *cl)
list_add(&cl->all, &closure_list);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
-EXPORT_SYMBOL(closure_debug_create);
void closure_debug_destroy(struct closure *cl)
{
@@ -156,7 +156,6 @@ void closure_debug_destroy(struct closure *cl)
list_del(&cl->all);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
-EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *closure_debug;
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 8b123be05254..336f43910383 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -178,10 +178,9 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
while (size) {
struct keybuf_key *w;
unsigned int bytes = min(i->bytes, size);
- int err = copy_to_user(buf, i->buf, bytes);
- if (err)
- return err;
+ if (copy_to_user(buf, i->buf, bytes))
+ return -EFAULT;
ret += bytes;
buf += bytes;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index be2a2a201603..0e3ff9745ac7 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -420,7 +420,10 @@ err:
static void btree_flush_write(struct cache_set *c)
{
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
- unsigned int i, n;
+ unsigned int i, nr;
+ int ref_nr;
+ atomic_t *fifo_front_p, *now_fifo_front_p;
+ size_t mask;
if (c->journal.btree_flushing)
return;
@@ -433,12 +436,50 @@ static void btree_flush_write(struct cache_set *c)
c->journal.btree_flushing = true;
spin_unlock(&c->journal.flush_write_lock);
+ /* get the oldest journal entry and check its refcount */
+ spin_lock(&c->journal.lock);
+ fifo_front_p = &fifo_front(&c->journal.pin);
+ ref_nr = atomic_read(fifo_front_p);
+ if (ref_nr <= 0) {
+ /*
+ * do nothing if no btree node references
+ * the oldest journal entry
+ */
+ spin_unlock(&c->journal.lock);
+ goto out;
+ }
+ spin_unlock(&c->journal.lock);
+
+ mask = c->journal.pin.mask;
+ nr = 0;
atomic_long_inc(&c->flush_write);
memset(btree_nodes, 0, sizeof(btree_nodes));
- n = 0;
mutex_lock(&c->bucket_lock);
list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
+ /*
+ * It is safe to get now_fifo_front_p without holding
+ * c->journal.lock here, because we don't need to know
+ * the exactly accurate value, just check whether the
+ * front pointer of c->journal.pin is changed.
+ */
+ now_fifo_front_p = &fifo_front(&c->journal.pin);
+ /*
+ * If the oldest journal entry is reclaimed and front
+ * pointer of c->journal.pin changes, it is unnecessary
+ * to scan c->btree_cache anymore, just quit the loop and
+ * flush out what we have already.
+ */
+ if (now_fifo_front_p != fifo_front_p)
+ break;
+ /*
+ * quit this loop if all matching btree nodes are
+ * scanned and record in btree_nodes[] already.
+ */
+ ref_nr = atomic_read(fifo_front_p);
+ if (nr >= ref_nr)
+ break;
+
if (btree_node_journal_flush(b))
pr_err("BUG: flush_write bit should not be set here!");
@@ -454,17 +495,43 @@ static void btree_flush_write(struct cache_set *c)
continue;
}
+ /*
+ * Only select the btree node which exactly references
+ * the oldest journal entry.
+ *
+ * If the journal entry pointed by fifo_front_p is
+ * reclaimed in parallel, don't worry:
+ * - the list_for_each_xxx loop will quit when checking
+ * next now_fifo_front_p.
+ * - If there are matched nodes recorded in btree_nodes[],
+ * they are clean now (this is why and how the oldest
+ * journal entry can be reclaimed). These selected nodes
+ * will be ignored and skipped in the folowing for-loop.
+ */
+ if (((btree_current_write(b)->journal - fifo_front_p) &
+ mask) != 0) {
+ mutex_unlock(&b->write_lock);
+ continue;
+ }
+
set_btree_node_journal_flush(b);
mutex_unlock(&b->write_lock);
- btree_nodes[n++] = b;
- if (n == BTREE_FLUSH_NR)
+ btree_nodes[nr++] = b;
+ /*
+ * To avoid holding c->bucket_lock too long time,
+ * only scan for BTREE_FLUSH_NR matched btree nodes
+ * at most. If there are more btree nodes reference
+ * the oldest journal entry, try to flush them next
+ * time when btree_flush_write() is called.
+ */
+ if (nr == BTREE_FLUSH_NR)
break;
}
mutex_unlock(&c->bucket_lock);
- for (i = 0; i < n; i++) {
+ for (i = 0; i < nr; i++) {
b = btree_nodes[i];
if (!b) {
pr_err("BUG: btree_nodes[%d] is NULL", i);
@@ -497,6 +564,7 @@ static void btree_flush_write(struct cache_set *c)
mutex_unlock(&b->write_lock);
}
+out:
spin_lock(&c->journal.flush_write_lock);
c->journal.btree_flushing = false;
spin_unlock(&c->journal.flush_write_lock);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 41adcd1546f1..820d8402a1dc 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -62,18 +62,6 @@ static void bch_data_insert_keys(struct closure *cl)
struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
int ret;
- /*
- * If we're looping, might already be waiting on
- * another journal write - can't wait on more than one journal write at
- * a time
- *
- * XXX: this looks wrong
- */
-#if 0
- while (atomic_read(&s->cl.remaining) & CLOSURE_WAITING)
- closure_sync(&s->cl);
-#endif
-
if (!op->replace)
journal_ref = bch_journal(op->c, &op->insert_keys,
op->flush_journal ? cl : NULL);
@@ -391,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;
/*
- * Flag for bypass if the IO is for read-ahead or background,
- * unless the read-ahead request is for metadata
+ * If the bio is for read-ahead or background IO, bypass it or
+ * not depends on the following situations,
+ * - If the IO is for meta data, always cache it and no bypass
+ * - If the IO is not meta data, check dc->cache_reada_policy,
+ * BCH_CACHE_READA_ALL: cache it and not bypass
+ * BCH_CACHE_READA_META_ONLY: not cache it and bypass
+ * That is, read-ahead request for metadata always get cached
* (eg, for gfs2 or xfs).
*/
- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
- !(bio->bi_opf & (REQ_META|REQ_PRIO)))
- goto skip;
+ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
+ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
+ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
+ goto skip;
+ }
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index ba1c93791d8d..503aafe188dc 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -109,9 +109,13 @@ int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
void bch_cache_accounting_clear(struct cache_accounting *acc)
{
- memset(&acc->total.cache_hits,
- 0,
- sizeof(struct cache_stats));
+ acc->total.cache_hits = 0;
+ acc->total.cache_misses = 0;
+ acc->total.cache_bypass_hits = 0;
+ acc->total.cache_bypass_misses = 0;
+ acc->total.cache_readaheads = 0;
+ acc->total.cache_miss_collisions = 0;
+ acc->total.sectors_bypassed = 0;
}
void bch_cache_accounting_destroy(struct cache_accounting *acc)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 20ed838e9413..0c3c5419c52b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -15,7 +15,6 @@
#include "writeback.h"
#include <linux/blkdev.h>
-#include <linux/buffer_head.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
#include <linux/idr.h>
@@ -60,17 +59,18 @@ struct workqueue_struct *bch_journal_wq;
/* Superblock */
static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
- struct page **res)
+ struct cache_sb_disk **res)
{
const char *err;
- struct cache_sb *s;
- struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
+ struct cache_sb_disk *s;
+ struct page *page;
unsigned int i;
- if (!bh)
+ page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+ SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
+ if (IS_ERR(page))
return "IO error";
-
- s = (struct cache_sb *) bh->b_data;
+ s = page_address(page) + offset_in_page(SB_OFFSET);
sb->offset = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -92,10 +92,11 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
sb->version, sb->flags, sb->seq, sb->keys);
- err = "Not a bcache superblock";
+ err = "Not a bcache superblock (bad offset)";
if (sb->offset != SB_SECTOR)
goto err;
+ err = "Not a bcache superblock (bad magic)";
if (memcmp(sb->magic, bcache_magic, 16))
goto err;
@@ -187,12 +188,10 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
}
sb->last_mount = (u32)ktime_get_real_seconds();
- err = NULL;
-
- get_page(bh->b_page);
- *res = bh->b_page;
+ *res = s;
+ return NULL;
err:
- put_bh(bh);
+ put_page(page);
return err;
}
@@ -206,15 +205,15 @@ static void write_bdev_super_endio(struct bio *bio)
closure_put(&dc->sb_write);
}
-static void __write_super(struct cache_sb *sb, struct bio *bio)
+static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
+ struct bio *bio)
{
- struct cache_sb *out = page_address(bio_first_page_all(bio));
unsigned int i;
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
bio->bi_iter.bi_sector = SB_SECTOR;
- bio->bi_iter.bi_size = SB_SIZE;
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
- bch_bio_map(bio, NULL);
+ __bio_add_page(bio, virt_to_page(out), SB_SIZE,
+ offset_in_page(out));
out->offset = cpu_to_le64(sb->offset);
out->version = cpu_to_le64(sb->version);
@@ -256,14 +255,14 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
down(&dc->sb_write_mutex);
closure_init(cl, parent);
- bio_reset(bio);
+ bio_init(bio, dc->sb_bv, 1);
bio_set_dev(bio, dc->bdev);
bio->bi_end_io = write_bdev_super_endio;
bio->bi_private = dc;
closure_get(cl);
/* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
+ __write_super(&dc->sb, dc->sb_disk, bio);
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
}
@@ -305,13 +304,13 @@ void bcache_write_super(struct cache_set *c)
SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb));
- bio_reset(bio);
+ bio_init(bio, ca->sb_bv, 1);
bio_set_dev(bio, ca->bdev);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
closure_get(cl);
- __write_super(&ca->sb, bio);
+ __write_super(&ca->sb, ca->sb_disk, bio);
}
closure_return_with_destructor(cl, bcache_write_super_unlock);
@@ -529,12 +528,29 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
closure_sync(cl);
}
-void bch_prio_write(struct cache *ca)
+int bch_prio_write(struct cache *ca, bool wait)
{
int i;
struct bucket *b;
struct closure cl;
+ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+ fifo_used(&ca->free[RESERVE_PRIO]),
+ fifo_used(&ca->free[RESERVE_NONE]),
+ fifo_used(&ca->free_inc));
+
+ /*
+ * Pre-check if there are enough free buckets. In the non-blocking
+ * scenario it's better to fail early rather than starting to allocate
+ * buckets and do a cleanup later in case of failure.
+ */
+ if (!wait) {
+ size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
+ fifo_used(&ca->free[RESERVE_NONE]);
+ if (prio_buckets(ca) > avail)
+ return -ENOMEM;
+ }
+
closure_init_stack(&cl);
lockdep_assert_held(&ca->set->bucket_lock);
@@ -544,9 +560,6 @@ void bch_prio_write(struct cache *ca)
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
&ca->meta_sectors_written);
- //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
- // fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-
for (i = prio_buckets(ca) - 1; i >= 0; --i) {
long bucket;
struct prio_set *p = ca->disk_buckets;
@@ -564,7 +577,7 @@ void bch_prio_write(struct cache *ca)
p->magic = pset_magic(&ca->sb);
p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
- bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
+ bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
BUG_ON(bucket == -1);
mutex_unlock(&ca->set->bucket_lock);
@@ -593,14 +606,16 @@ void bch_prio_write(struct cache *ca)
ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
+ return 0;
}
-static void prio_read(struct cache *ca, uint64_t bucket)
+static int prio_read(struct cache *ca, uint64_t bucket)
{
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket *b;
unsigned int bucket_nr = 0;
+ int ret = -EIO;
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets;
@@ -613,11 +628,15 @@ static void prio_read(struct cache *ca, uint64_t bucket)
prio_io(ca, bucket, REQ_OP_READ, 0);
if (p->csum !=
- bch_crc64(&p->magic, bucket_bytes(ca) - 8))
+ bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
pr_warn("bad csum reading priorities");
+ goto out;
+ }
- if (p->magic != pset_magic(&ca->sb))
+ if (p->magic != pset_magic(&ca->sb)) {
pr_warn("bad magic reading priorities");
+ goto out;
+ }
bucket = p->next_bucket;
d = p->data;
@@ -626,6 +645,10 @@ static void prio_read(struct cache *ca, uint64_t bucket)
b->prio = le16_to_cpu(d->prio);
b->gen = b->last_gc = d->gen;
}
+
+ ret = 0;
+out:
+ return ret;
}
/* Bcache device */
@@ -761,20 +784,28 @@ static inline int idx_to_first_minor(int idx)
static void bcache_device_free(struct bcache_device *d)
{
+ struct gendisk *disk = d->disk;
+
lockdep_assert_held(&bch_register_lock);
- pr_info("%s stopped", d->disk->disk_name);
+ if (disk)
+ pr_info("%s stopped", disk->disk_name);
+ else
+ pr_err("bcache device (NULL gendisk) stopped");
if (d->c)
bcache_device_detach(d);
- if (d->disk && d->disk->flags & GENHD_FL_UP)
- del_gendisk(d->disk);
- if (d->disk && d->disk->queue)
- blk_cleanup_queue(d->disk->queue);
- if (d->disk) {
+
+ if (disk) {
+ if (disk->flags & GENHD_FL_UP)
+ del_gendisk(disk);
+
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+
ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(d->disk->first_minor));
- put_disk(d->disk);
+ first_minor_to_idx(disk->first_minor));
+ put_disk(disk);
}
bioset_exit(&d->bio_split);
@@ -1251,6 +1282,9 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
+ if (dc->sb_disk)
+ put_page(virt_to_page(dc->sb_disk));
+
if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
@@ -1326,7 +1360,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
/* Cached device - bcache superblock */
-static int register_bdev(struct cache_sb *sb, struct page *sb_page,
+static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
struct block_device *bdev,
struct cached_dev *dc)
{
@@ -1338,11 +1372,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page,
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->bdev = bdev;
dc->bdev->bd_holder = dc;
-
- bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1);
- bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
- get_page(sb_page);
-
+ dc->sb_disk = sb_disk;
if (cached_dev_init(dc, sb->block_size << 9))
goto err;
@@ -1769,6 +1799,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->btree_cache_wait);
+ spin_lock_init(&c->btree_cannibalize_lock);
init_waitqueue_head(&c->bucket_wait);
init_waitqueue_head(&c->gc_wait);
sema_init(&c->uuid_write_mutex, 1);
@@ -1809,6 +1840,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ c->idle_max_writeback_rate_enabled = 1;
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
@@ -1850,8 +1882,10 @@ static int run_cache_set(struct cache_set *c)
j = &list_entry(journal.prev, struct journal_replay, list)->j;
err = "IO error reading priorities";
- for_each_cache(ca, c, i)
- prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]);
+ for_each_cache(ca, c, i) {
+ if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]))
+ goto err;
+ }
/*
* If prio_read() fails it'll call cache_set_error and we'll
@@ -1883,23 +1917,6 @@ static int run_cache_set(struct cache_set *c)
if (bch_btree_check(c))
goto err;
- /*
- * bch_btree_check() may occupy too much system memory which
- * has negative effects to user space application (e.g. data
- * base) performance. Shrink the mca cache memory proactively
- * here to avoid competing memory with user space workloads..
- */
- if (!c->shrinker_disabled) {
- struct shrink_control sc;
-
- sc.gfp_mask = GFP_KERNEL;
- sc.nr_to_scan = c->btree_cache_used * c->btree_pages;
- /* first run to clear b->accessed tag */
- c->shrink.scan_objects(&c->shrink, &sc);
- /* second run to reap non-accessed nodes */
- c->shrink.scan_objects(&c->shrink, &sc);
- }
-
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
@@ -1954,7 +1971,7 @@ static int run_cache_set(struct cache_set *c)
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i)
- bch_prio_write(ca);
+ bch_prio_write(ca, true);
mutex_unlock(&c->bucket_lock);
err = "cannot allocate new UUID bucket";
@@ -2110,8 +2127,8 @@ void bch_cache_release(struct kobject *kobj)
for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]);
- if (ca->sb_bio.bi_inline_vecs[0].bv_page)
- put_page(bio_first_page_all(&ca->sb_bio));
+ if (ca->sb_disk)
+ put_page(virt_to_page(ca->sb_disk));
if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
@@ -2233,7 +2250,7 @@ err_free:
return ret;
}
-static int register_cache(struct cache_sb *sb, struct page *sb_page,
+static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
struct block_device *bdev, struct cache *ca)
{
const char *err = NULL; /* must be set for any error case */
@@ -2243,10 +2260,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
ca->bdev->bd_holder = ca;
-
- bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1);
- bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
- get_page(sb_page);
+ ca->sb_disk = sb_disk;
if (blk_queue_discard(bdev_get_queue(bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
@@ -2346,29 +2360,35 @@ static bool bch_is_open(struct block_device *bdev)
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
- ssize_t ret = -EINVAL;
- const char *err = "cannot allocate memory";
+ const char *err;
char *path = NULL;
- struct cache_sb *sb = NULL;
- struct block_device *bdev = NULL;
- struct page *sb_page = NULL;
+ struct cache_sb *sb;
+ struct cache_sb_disk *sb_disk;
+ struct block_device *bdev;
+ ssize_t ret;
+ ret = -EBUSY;
+ err = "failed to reference bcache module";
if (!try_module_get(THIS_MODULE))
- return -EBUSY;
+ goto out;
/* For latest state of bcache_is_reboot */
smp_mb();
+ err = "bcache is in reboot";
if (bcache_is_reboot)
- return -EBUSY;
+ goto out_module_put;
+ ret = -ENOMEM;
+ err = "cannot allocate memory";
path = kstrndup(buffer, size, GFP_KERNEL);
if (!path)
- goto err;
+ goto out_module_put;
sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
if (!sb)
- goto err;
+ goto out_free_path;
+ ret = -EINVAL;
err = "failed to open device";
bdev = blkdev_get_by_path(strim(path),
FMODE_READ|FMODE_WRITE|FMODE_EXCL,
@@ -2385,57 +2405,63 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!IS_ERR(bdev))
bdput(bdev);
if (attr == &ksysfs_register_quiet)
- goto quiet_out;
+ goto done;
}
- goto err;
+ goto out_free_sb;
}
err = "failed to set blocksize";
if (set_blocksize(bdev, 4096))
- goto err_close;
+ goto out_blkdev_put;
- err = read_super(sb, bdev, &sb_page);
+ err = read_super(sb, bdev, &sb_disk);
if (err)
- goto err_close;
+ goto out_blkdev_put;
err = "failed to register device";
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc)
- goto err_close;
+ goto out_put_sb_page;
mutex_lock(&bch_register_lock);
- ret = register_bdev(sb, sb_page, bdev, dc);
+ ret = register_bdev(sb, sb_disk, bdev, dc);
mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */
if (ret < 0)
- goto err;
+ goto out_free_sb;
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
- goto err_close;
+ goto out_put_sb_page;
/* blkdev_put() will be called in bch_cache_release() */
- if (register_cache(sb, sb_page, bdev, ca) != 0)
- goto err;
+ if (register_cache(sb, sb_disk, bdev, ca) != 0)
+ goto out_free_sb;
}
-quiet_out:
- ret = size;
-out:
- if (sb_page)
- put_page(sb_page);
+
+done:
kfree(sb);
kfree(path);
module_put(THIS_MODULE);
- return ret;
+ return size;
-err_close:
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-err:
- pr_info("error %s: %s", path, err);
- goto out;
+out_put_sb_page:
+ put_page(virt_to_page(sb_disk));
+out_blkdev_put:
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+out_free_sb:
+ kfree(sb);
+out_free_path:
+ kfree(path);
+ path = NULL;
+out_module_put:
+ module_put(THIS_MODULE);
+out:
+ pr_info("error %s: %s", path?path:"", err);
+ return ret;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index e2059af90791..3470fae4eabc 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = {
NULL
};
+static const char * const bch_reada_cache_policies[] = {
+ "all",
+ "meta-only",
+ NULL
+};
+
/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(readahead_cache_policy);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
@@ -134,6 +141,7 @@ rw_attribute(expensive_debug_checks);
rw_attribute(cache_replacement_policy);
rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
+rw_attribute(idle_max_writeback_rate);
rw_attribute(gc_after_writeback);
rw_attribute(size);
@@ -167,6 +175,11 @@ SHOW(__bch_cached_dev)
bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));
+ if (attr == &sysfs_readahead_cache_policy)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_reada_cache_policies,
+ dc->cache_readahead_policy);
+
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes,
@@ -352,6 +365,15 @@ STORE(__cached_dev)
}
}
+ if (attr == &sysfs_readahead_cache_policy) {
+ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
+ if (v < 0)
+ return v;
+
+ if ((unsigned int) v != dc->cache_readahead_policy)
+ dc->cache_readahead_policy = v;
+ }
+
if (attr == &sysfs_stop_when_cache_set_failed) {
v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
@@ -466,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_readahead_cache_policy,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
@@ -747,6 +770,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+ sysfs_printf(idle_max_writeback_rate, "%i",
+ c->idle_max_writeback_rate_enabled);
sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback);
sysfs_printf(io_disable, "%i",
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -864,6 +889,9 @@ STORE(__bch_cache_set)
sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite);
sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled);
+ sysfs_strtoul_bool(idle_max_writeback_rate,
+ c->idle_max_writeback_rate_enabled);
+
/*
* write gc_after_writeback here may overwrite an already set
* BCH_DO_AUTO_GC, it doesn't matter because this flag will be
@@ -954,6 +982,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
+ &sysfs_idle_max_writeback_rate,
&sysfs_gc_after_writeback,
&sysfs_io_disable,
&sysfs_cutoff_writeback,
@@ -964,6 +993,7 @@ KTYPE(bch_cache_set_internal);
static int __bch_cache_cmp(const void *l, const void *r)
{
+ cond_resched();
return *((uint16_t *)r) - *((uint16_t *)l);
}
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d60268fe49e1..4a40f9eadeaf 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -122,6 +122,10 @@ static void __update_writeback_rate(struct cached_dev *dc)
static bool set_at_max_writeback_rate(struct cache_set *c,
struct cached_dev *dc)
{
+ /* Don't sst max writeback rate if it is disabled */
+ if (!c->idle_max_writeback_rate_enabled)
+ return false;
+
/* Don't set max writeback rate if gc is running */
if (!c->gc_mark_valid)
return false;
OpenPOWER on IntegriCloud