bcache: Kill unused freelist
This was originally added as at optimization that for various reasons isn't needed anymore, but it does add a lot of nasty corner cases (and it was responsible for some recently fixed bugs). Just get rid of it now. Signed-off-by: Kent Overstreet <kmo@daterainc.com>
This commit is contained in:
Родитель
0a63b66db5
Коммит
2531d9ee61
|
@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
|
|||
ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
|
||||
WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
|
||||
|
||||
if (CACHE_SYNC(&ca->set->sb)) {
|
||||
ca->need_save_prio = max(ca->need_save_prio,
|
||||
bucket_disk_gen(b));
|
||||
WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -120,58 +114,46 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
|
|||
mutex_unlock(&c->bucket_lock);
|
||||
}
|
||||
|
||||
/* Allocation */
|
||||
/*
|
||||
* Background allocation thread: scans for buckets to be invalidated,
|
||||
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
|
||||
* then optionally issues discard commands to the newly free buckets, then puts
|
||||
* them on the various freelists.
|
||||
*/
|
||||
|
||||
static inline bool can_inc_bucket_gen(struct bucket *b)
|
||||
{
|
||||
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
|
||||
bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
|
||||
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
|
||||
}
|
||||
|
||||
bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
|
||||
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
|
||||
BUG_ON(!ca->set->gc_mark_valid);
|
||||
|
||||
if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < RESERVE_NONE; i++)
|
||||
if (!fifo_full(&ca->free[i]))
|
||||
goto add;
|
||||
|
||||
return false;
|
||||
}
|
||||
add:
|
||||
b->prio = 0;
|
||||
|
||||
if (can_inc_bucket_gen(b) &&
|
||||
fifo_push(&ca->unused, b - ca->buckets)) {
|
||||
atomic_inc(&b->pin);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
return (!GC_MARK(b) ||
|
||||
GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
|
||||
!atomic_read(&b->pin) &&
|
||||
can_inc_bucket_gen(b);
|
||||
}
|
||||
|
||||
static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
||||
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
size_t bucket = b - ca->buckets;
|
||||
lockdep_assert_held(&ca->set->bucket_lock);
|
||||
BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
|
||||
|
||||
if (GC_SECTORS_USED(b))
|
||||
trace_bcache_invalidate(ca, bucket);
|
||||
trace_bcache_invalidate(ca, b - ca->buckets);
|
||||
|
||||
bch_inc_gen(ca, b);
|
||||
b->prio = INITIAL_PRIO;
|
||||
atomic_inc(&b->pin);
|
||||
fifo_push(&ca->free_inc, bucket);
|
||||
}
|
||||
|
||||
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
__bch_invalidate_one_bucket(ca, b);
|
||||
|
||||
fifo_push(&ca->free_inc, b - ca->buckets);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -201,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
|
|||
ca->heap.used = 0;
|
||||
|
||||
for_each_bucket(b, ca) {
|
||||
/*
|
||||
* If we fill up the unused list, if we then return before
|
||||
* adding anything to the free_inc list we'll skip writing
|
||||
* prios/gens and just go back to allocating from the unused
|
||||
* list:
|
||||
*/
|
||||
if (fifo_full(&ca->unused))
|
||||
return;
|
||||
|
||||
if (!can_invalidate_bucket(ca, b))
|
||||
continue;
|
||||
|
||||
if (!GC_SECTORS_USED(b) &&
|
||||
bch_bucket_add_unused(ca, b))
|
||||
if (!bch_can_invalidate_bucket(ca, b))
|
||||
continue;
|
||||
|
||||
if (!heap_full(&ca->heap))
|
||||
|
@ -239,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
|
|||
return;
|
||||
}
|
||||
|
||||
invalidate_one_bucket(ca, b);
|
||||
bch_invalidate_one_bucket(ca, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)
|
|||
|
||||
b = ca->buckets + ca->fifo_last_bucket++;
|
||||
|
||||
if (can_invalidate_bucket(ca, b))
|
||||
invalidate_one_bucket(ca, b);
|
||||
if (bch_can_invalidate_bucket(ca, b))
|
||||
bch_invalidate_one_bucket(ca, b);
|
||||
|
||||
if (++checked >= ca->sb.nbuckets) {
|
||||
ca->invalidate_needs_gc = 1;
|
||||
|
@ -280,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)
|
|||
|
||||
b = ca->buckets + n;
|
||||
|
||||
if (can_invalidate_bucket(ca, b))
|
||||
invalidate_one_bucket(ca, b);
|
||||
if (bch_can_invalidate_bucket(ca, b))
|
||||
bch_invalidate_one_bucket(ca, b);
|
||||
|
||||
if (++checked >= ca->sb.nbuckets / 2) {
|
||||
ca->invalidate_needs_gc = 1;
|
||||
|
@ -293,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)
|
|||
|
||||
static void invalidate_buckets(struct cache *ca)
|
||||
{
|
||||
if (ca->invalidate_needs_gc)
|
||||
return;
|
||||
BUG_ON(ca->invalidate_needs_gc);
|
||||
|
||||
switch (CACHE_REPLACEMENT(&ca->sb)) {
|
||||
case CACHE_REPLACEMENT_LRU:
|
||||
|
@ -354,17 +322,10 @@ static int bch_allocator_thread(void *arg)
|
|||
* possibly issue discards to them, then we add the bucket to
|
||||
* the free list:
|
||||
*/
|
||||
while (1) {
|
||||
while (!fifo_empty(&ca->free_inc)) {
|
||||
long bucket;
|
||||
|
||||
if ((!atomic_read(&ca->set->prio_blocked) ||
|
||||
!CACHE_SYNC(&ca->set->sb)) &&
|
||||
!fifo_empty(&ca->unused))
|
||||
fifo_pop(&ca->unused, bucket);
|
||||
else if (!fifo_empty(&ca->free_inc))
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
else
|
||||
break;
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
|
||||
if (ca->discard) {
|
||||
mutex_unlock(&ca->set->bucket_lock);
|
||||
|
@ -385,9 +346,9 @@ static int bch_allocator_thread(void *arg)
|
|||
* them to the free_inc list:
|
||||
*/
|
||||
|
||||
retry_invalidate:
|
||||
allocator_wait(ca, ca->set->gc_mark_valid &&
|
||||
(ca->need_save_prio > 64 ||
|
||||
!ca->invalidate_needs_gc));
|
||||
!ca->invalidate_needs_gc);
|
||||
invalidate_buckets(ca);
|
||||
|
||||
/*
|
||||
|
@ -395,13 +356,28 @@ static int bch_allocator_thread(void *arg)
|
|||
* new stuff to them:
|
||||
*/
|
||||
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
|
||||
if (CACHE_SYNC(&ca->set->sb) &&
|
||||
(!fifo_empty(&ca->free_inc) ||
|
||||
ca->need_save_prio > 64))
|
||||
if (CACHE_SYNC(&ca->set->sb)) {
|
||||
/*
|
||||
* This could deadlock if an allocation with a btree
|
||||
* node locked ever blocked - having the btree node
|
||||
* locked would block garbage collection, but here we're
|
||||
* waiting on garbage collection before we invalidate
|
||||
* and free anything.
|
||||
*
|
||||
* But this should be safe since the btree code always
|
||||
* uses btree_check_reserve() before allocating now, and
|
||||
* if it fails it blocks without btree nodes locked.
|
||||
*/
|
||||
if (!fifo_full(&ca->free_inc))
|
||||
goto retry_invalidate;
|
||||
|
||||
bch_prio_write(ca);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocation */
|
||||
|
||||
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
|
||||
{
|
||||
DEFINE_WAIT(w);
|
||||
|
@ -447,8 +423,6 @@ out:
|
|||
BUG_ON(i == r);
|
||||
fifo_for_each(i, &ca->free_inc, iter)
|
||||
BUG_ON(i == r);
|
||||
fifo_for_each(i, &ca->unused, iter)
|
||||
BUG_ON(i == r);
|
||||
}
|
||||
|
||||
b = ca->buckets + r;
|
||||
|
@ -470,17 +444,19 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
void __bch_bucket_free(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
SET_GC_MARK(b, 0);
|
||||
SET_GC_SECTORS_USED(b, 0);
|
||||
}
|
||||
|
||||
void bch_bucket_free(struct cache_set *c, struct bkey *k)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < KEY_PTRS(k); i++) {
|
||||
struct bucket *b = PTR_BUCKET(c, k, i);
|
||||
|
||||
SET_GC_MARK(b, 0);
|
||||
SET_GC_SECTORS_USED(b, 0);
|
||||
bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
|
||||
}
|
||||
for (i = 0; i < KEY_PTRS(k); i++)
|
||||
__bch_bucket_free(PTR_CACHE(c, k, i),
|
||||
PTR_BUCKET(c, k, i));
|
||||
}
|
||||
|
||||
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
|
||||
|
|
|
@ -195,7 +195,6 @@ struct bucket {
|
|||
atomic_t pin;
|
||||
uint16_t prio;
|
||||
uint8_t gen;
|
||||
uint8_t disk_gen;
|
||||
uint8_t last_gc; /* Most out of date gen in the btree */
|
||||
uint8_t gc_gen;
|
||||
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
|
||||
|
@ -426,14 +425,9 @@ struct cache {
|
|||
* their new gen to disk. After prio_write() finishes writing the new
|
||||
* gens/prios, they'll be moved to the free list (and possibly discarded
|
||||
* in the process)
|
||||
*
|
||||
* unused: GC found nothing pointing into these buckets (possibly
|
||||
* because all the data they contained was overwritten), so we only
|
||||
* need to discard them before they can be moved to the free list.
|
||||
*/
|
||||
DECLARE_FIFO(long, free)[RESERVE_NR];
|
||||
DECLARE_FIFO(long, free_inc);
|
||||
DECLARE_FIFO(long, unused);
|
||||
|
||||
size_t fifo_last_bucket;
|
||||
|
||||
|
@ -442,12 +436,6 @@ struct cache {
|
|||
|
||||
DECLARE_HEAP(struct bucket *, heap);
|
||||
|
||||
/*
|
||||
* max(gen - disk_gen) for all buckets. When it gets too big we have to
|
||||
* call prio_write() to keep gens from wrapping.
|
||||
*/
|
||||
uint8_t need_save_prio;
|
||||
|
||||
/*
|
||||
* If nonzero, we know we aren't going to find any buckets to invalidate
|
||||
* until a gc finishes - otherwise we could pointlessly burn a ton of
|
||||
|
@ -848,9 +836,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
|
|||
/*
|
||||
* bucket_gc_gen() returns the difference between the bucket's current gen and
|
||||
* the oldest gen of any pointer into that bucket in the btree (last_gc).
|
||||
*
|
||||
* bucket_disk_gen() returns the difference between the current gen and the gen
|
||||
* on disk; they're both used to make sure gens don't wrap around.
|
||||
*/
|
||||
|
||||
static inline uint8_t bucket_gc_gen(struct bucket *b)
|
||||
|
@ -858,13 +843,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
|
|||
return b->gen - b->last_gc;
|
||||
}
|
||||
|
||||
static inline uint8_t bucket_disk_gen(struct bucket *b)
|
||||
{
|
||||
return b->gen - b->disk_gen;
|
||||
}
|
||||
|
||||
#define BUCKET_GC_GEN_MAX 96U
|
||||
#define BUCKET_DISK_GEN_MAX 64U
|
||||
|
||||
#define kobj_attribute_write(n, fn) \
|
||||
static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
|
||||
|
@ -897,11 +876,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
|
|||
|
||||
uint8_t bch_inc_gen(struct cache *, struct bucket *);
|
||||
void bch_rescale_priorities(struct cache_set *, int);
|
||||
bool bch_bucket_add_unused(struct cache *, struct bucket *);
|
||||
|
||||
long bch_bucket_alloc(struct cache *, unsigned, bool);
|
||||
bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
|
||||
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
|
||||
|
||||
void __bch_bucket_free(struct cache *, struct bucket *);
|
||||
void bch_bucket_free(struct cache_set *, struct bkey *);
|
||||
|
||||
long bch_bucket_alloc(struct cache *, unsigned, bool);
|
||||
int __bch_bucket_alloc_set(struct cache_set *, unsigned,
|
||||
struct bkey *, int, bool);
|
||||
int bch_bucket_alloc_set(struct cache_set *, unsigned,
|
||||
|
|
|
@ -1641,7 +1641,7 @@ static void btree_gc_start(struct cache_set *c)
|
|||
mutex_unlock(&c->bucket_lock);
|
||||
}
|
||||
|
||||
size_t bch_btree_gc_finish(struct cache_set *c)
|
||||
static size_t bch_btree_gc_finish(struct cache_set *c)
|
||||
{
|
||||
size_t available = 0;
|
||||
struct bucket *b;
|
||||
|
@ -1703,9 +1703,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
|
|||
|
||||
if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
|
||||
available++;
|
||||
|
||||
if (!GC_MARK(b))
|
||||
bch_bucket_add_unused(ca, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1836,6 +1833,42 @@ int bch_btree_check(struct cache_set *c)
|
|||
return btree_root(check_recurse, c, &op);
|
||||
}
|
||||
|
||||
void bch_initial_gc_finish(struct cache_set *c)
|
||||
{
|
||||
struct cache *ca;
|
||||
struct bucket *b;
|
||||
unsigned i;
|
||||
|
||||
bch_btree_gc_finish(c);
|
||||
|
||||
mutex_lock(&c->bucket_lock);
|
||||
|
||||
/*
|
||||
* We need to put some unused buckets directly on the prio freelist in
|
||||
* order to get the allocator thread started - it needs freed buckets in
|
||||
* order to rewrite the prios and gens, and it needs to rewrite prios
|
||||
* and gens in order to free buckets.
|
||||
*
|
||||
* This is only safe for buckets that have no live data in them, which
|
||||
* there should always be some of.
|
||||
*/
|
||||
for_each_cache(ca, c, i) {
|
||||
for_each_bucket(b, ca) {
|
||||
if (fifo_full(&ca->free[RESERVE_PRIO]))
|
||||
break;
|
||||
|
||||
if (bch_can_invalidate_bucket(ca, b) &&
|
||||
!GC_MARK(b)) {
|
||||
__bch_invalidate_one_bucket(ca, b);
|
||||
fifo_push(&ca->free[RESERVE_PRIO],
|
||||
b - ca->buckets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&c->bucket_lock);
|
||||
}
|
||||
|
||||
/* Btree insertion */
|
||||
|
||||
static bool btree_insert_key(struct btree *b, struct bkey *k,
|
||||
|
|
|
@ -252,7 +252,7 @@ int bch_btree_insert(struct cache_set *, struct keylist *,
|
|||
atomic_t *, struct bkey *);
|
||||
|
||||
int bch_gc_thread_start(struct cache_set *);
|
||||
size_t bch_btree_gc_finish(struct cache_set *);
|
||||
void bch_initial_gc_finish(struct cache_set *);
|
||||
void bch_moving_gc(struct cache_set *);
|
||||
int bch_btree_check(struct cache_set *);
|
||||
void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
|
||||
|
|
|
@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
|
|||
closure_sync(cl);
|
||||
}
|
||||
|
||||
#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \
|
||||
fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused)
|
||||
|
||||
void bch_prio_write(struct cache *ca)
|
||||
{
|
||||
int i;
|
||||
|
@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca)
|
|||
|
||||
lockdep_assert_held(&ca->set->bucket_lock);
|
||||
|
||||
for (b = ca->buckets;
|
||||
b < ca->buckets + ca->sb.nbuckets; b++)
|
||||
b->disk_gen = b->gen;
|
||||
|
||||
ca->disk_buckets->seq++;
|
||||
|
||||
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
|
||||
|
@ -601,14 +594,17 @@ void bch_prio_write(struct cache *ca)
|
|||
|
||||
mutex_lock(&ca->set->bucket_lock);
|
||||
|
||||
ca->need_save_prio = 0;
|
||||
|
||||
/*
|
||||
* Don't want the old priorities to get garbage collected until after we
|
||||
* finish writing the new ones, and they're journalled
|
||||
*/
|
||||
for (i = 0; i < prio_buckets(ca); i++)
|
||||
for (i = 0; i < prio_buckets(ca); i++) {
|
||||
if (ca->prio_last_buckets[i])
|
||||
__bch_bucket_free(ca,
|
||||
&ca->buckets[ca->prio_last_buckets[i]]);
|
||||
|
||||
ca->prio_last_buckets[i] = ca->prio_buckets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void prio_read(struct cache *ca, uint64_t bucket)
|
||||
|
@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
|
|||
}
|
||||
|
||||
b->prio = le16_to_cpu(d->prio);
|
||||
b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen;
|
||||
b->gen = b->last_gc = b->gc_gen = d->gen;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1606,7 +1602,7 @@ static void run_cache_set(struct cache_set *c)
|
|||
goto err;
|
||||
|
||||
bch_journal_mark(c, &journal);
|
||||
bch_btree_gc_finish(c);
|
||||
bch_initial_gc_finish(c);
|
||||
pr_debug("btree_check() done");
|
||||
|
||||
/*
|
||||
|
@ -1648,7 +1644,7 @@ static void run_cache_set(struct cache_set *c)
|
|||
ca->sb.d[j] = ca->sb.first_bucket + j;
|
||||
}
|
||||
|
||||
bch_btree_gc_finish(c);
|
||||
bch_initial_gc_finish(c);
|
||||
|
||||
err = "error starting allocator thread";
|
||||
for_each_cache(ca, c, i)
|
||||
|
@ -1794,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj)
|
|||
vfree(ca->buckets);
|
||||
|
||||
free_heap(&ca->heap);
|
||||
free_fifo(&ca->unused);
|
||||
free_fifo(&ca->free_inc);
|
||||
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
|
@ -1831,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
|
|||
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
|
||||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
|
||||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
|
||||
ca->sb.nbuckets)) ||
|
||||
|
|
|
@ -446,7 +446,6 @@ TRACE_EVENT(bcache_alloc_fail,
|
|||
__field(dev_t, dev )
|
||||
__field(unsigned, free )
|
||||
__field(unsigned, free_inc )
|
||||
__field(unsigned, unused )
|
||||
__field(unsigned, blocked )
|
||||
),
|
||||
|
||||
|
@ -454,13 +453,12 @@ TRACE_EVENT(bcache_alloc_fail,
|
|||
__entry->dev = ca->bdev->bd_dev;
|
||||
__entry->free = fifo_used(&ca->free[reserve]);
|
||||
__entry->free_inc = fifo_used(&ca->free_inc);
|
||||
__entry->unused = fifo_used(&ca->unused);
|
||||
__entry->blocked = atomic_read(&ca->set->prio_blocked);
|
||||
),
|
||||
|
||||
TP_printk("alloc fail %d,%d free %u free_inc %u unused %u blocked %u",
|
||||
TP_printk("alloc fail %d,%d free %u free_inc %u blocked %u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->free,
|
||||
__entry->free_inc, __entry->unused, __entry->blocked)
|
||||
__entry->free_inc, __entry->blocked)
|
||||
);
|
||||
|
||||
/* Background writeback */
|
||||
|
|
Загрузка…
Ссылка в новой задаче