btrfs: zoned: implement active zone tracking

Add zone_is_active flag to btrfs_block_group. This flag indicates the
underlying zones are all active. Such zone active block groups are tracked
by fs_info->active_bg_list.

btrfs_dev_{set,clear}_active_zone() take responsibility for the underlying
device part. They set/clear the bitmap to indicate zone activeness and
count the number of zones we can activate left.

btrfs_zone_{activate,finish}() take responsibility for the logical part and
the list management. In addition, btrfs_zone_finish() wait for any writes
on it and send REQ_OP_ZONE_FINISH to the zone.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Naohiro Aota 2021-08-19 21:19:17 +09:00 коммит произвёл David Sterba
Родитель dafc340dbd
Коммит afba2bc036
7 изменённых файлов: 226 добавлений и 2 удалений

Просмотреть файл

@ -1896,6 +1896,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->discard_list); INIT_LIST_HEAD(&cache->discard_list);
INIT_LIST_HEAD(&cache->dirty_list); INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list); INIT_LIST_HEAD(&cache->io_list);
INIT_LIST_HEAD(&cache->active_bg_list);
btrfs_init_free_space_ctl(cache, cache->free_space_ctl); btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
atomic_set(&cache->frozen, 0); atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock); mutex_init(&cache->free_space_lock);
@ -3842,6 +3843,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
} }
spin_unlock(&info->unused_bgs_lock); spin_unlock(&info->unused_bgs_lock);
spin_lock(&info->zone_active_bgs_lock);
while (!list_empty(&info->zone_active_bgs)) {
block_group = list_first_entry(&info->zone_active_bgs,
struct btrfs_block_group,
active_bg_list);
list_del_init(&block_group->active_bg_list);
btrfs_put_block_group(block_group);
}
spin_unlock(&info->zone_active_bgs_lock);
spin_lock(&info->block_group_cache_lock); spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group, block_group = rb_entry(n, struct btrfs_block_group,

Просмотреть файл

@ -98,6 +98,7 @@ struct btrfs_block_group {
unsigned int to_copy:1; unsigned int to_copy:1;
unsigned int relocating_repair:1; unsigned int relocating_repair:1;
unsigned int chunk_item_inserted:1; unsigned int chunk_item_inserted:1;
unsigned int zone_is_active:1;
int disk_cache_state; int disk_cache_state;
@ -205,6 +206,7 @@ struct btrfs_block_group {
u64 zone_capacity; u64 zone_capacity;
u64 meta_write_pointer; u64 meta_write_pointer;
struct map_lookup *physical_map; struct map_lookup *physical_map;
struct list_head active_bg_list;
}; };
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)

Просмотреть файл

@ -1018,6 +1018,9 @@ struct btrfs_fs_info {
spinlock_t treelog_bg_lock; spinlock_t treelog_bg_lock;
u64 treelog_bg; u64 treelog_bg;
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY #ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock; spinlock_t ref_verify_lock;
struct rb_root block_tree; struct rb_root block_tree;

Просмотреть файл

@ -2884,6 +2884,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->unused_bgs_lock);
spin_lock_init(&fs_info->treelog_bg_lock); spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock); rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->reclaim_bgs_lock); mutex_init(&fs_info->reclaim_bgs_lock);
@ -2897,6 +2898,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->unused_bgs);
INIT_LIST_HEAD(&fs_info->reclaim_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs);
INIT_LIST_HEAD(&fs_info->zone_active_bgs);
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_roots);
INIT_LIST_HEAD(&fs_info->allocated_ebs); INIT_LIST_HEAD(&fs_info->allocated_ebs);

Просмотреть файл

@ -2763,8 +2763,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
* out the free space after the allocation offset. * out the free space after the allocation offset.
*/ */
if (btrfs_is_zoned(fs_info)) { if (btrfs_is_zoned(fs_info)) {
btrfs_info(fs_info, "free space %llu", btrfs_info(fs_info, "free space %llu active %d",
block_group->zone_capacity - block_group->alloc_offset); block_group->zone_capacity - block_group->alloc_offset,
block_group->zone_is_active);
return; return;
} }

Просмотреть файл

@ -989,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
return pos; return pos;
} }
static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos)
{
struct btrfs_zoned_device_info *zone_info = device->zone_info;
unsigned int zno = (pos >> zone_info->zone_size_shift);
/* We can use any number of zones */
if (zone_info->max_active_zones == 0)
return true;
if (!test_bit(zno, zone_info->active_zones)) {
/* Active zone left? */
if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0)
return false;
if (test_and_set_bit(zno, zone_info->active_zones)) {
/* Someone already set the bit */
atomic_inc(&zone_info->active_zones_left);
}
}
return true;
}
static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
{
struct btrfs_zoned_device_info *zone_info = device->zone_info;
unsigned int zno = (pos >> zone_info->zone_size_shift);
/* We can use any number of zones */
if (zone_info->max_active_zones == 0)
return;
if (test_and_clear_bit(zno, zone_info->active_zones))
atomic_inc(&zone_info->active_zones_left);
}
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
u64 length, u64 *bytes) u64 length, u64 *bytes)
{ {
@ -1004,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
*bytes = length; *bytes = length;
while (length) { while (length) {
btrfs_dev_set_zone_empty(device, physical); btrfs_dev_set_zone_empty(device, physical);
btrfs_dev_clear_active_zone(device, physical);
physical += device->zone_info->zone_size; physical += device->zone_info->zone_size;
length -= device->zone_info->zone_size; length -= device->zone_info->zone_size;
} }
@ -1656,3 +1692,160 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
return device; return device;
} }
/**
* Activate block group and underlying device zones
*
* @block_group: the block group to activate
*
* Return: true on success, false otherwise
*/
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
bool ret;
if (!btrfs_is_zoned(block_group->fs_info))
return true;
map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return true;
spin_lock(&block_group->lock);
if (block_group->zone_is_active) {
ret = true;
goto out_unlock;
}
/* No space left */
if (block_group->alloc_offset == block_group->zone_capacity) {
ret = false;
goto out_unlock;
}
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
/* Successfully activated all the zones */
block_group->zone_is_active = 1;
spin_unlock(&block_group->lock);
/* For the active block group list */
btrfs_get_block_group(block_group);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(list_empty(&block_group->active_bg_list));
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
return true;
out_unlock:
spin_unlock(&block_group->lock);
return ret;
}
int btrfs_zone_finish(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
int ret = 0;
if (!btrfs_is_zoned(fs_info))
return 0;
map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return 0;
spin_lock(&block_group->lock);
if (!block_group->zone_is_active) {
spin_unlock(&block_group->lock);
return 0;
}
/* Check if we have unwritten allocated space */
if ((block_group->flags &
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
block_group->alloc_offset > block_group->meta_write_pointer) {
spin_unlock(&block_group->lock);
return -EAGAIN;
}
spin_unlock(&block_group->lock);
ret = btrfs_inc_block_group_ro(block_group, false);
if (ret)
return ret;
/* Ensure all writes in this block group finish */
btrfs_wait_block_group_reservations(block_group);
/* No need to wait for NOCOW writers. Zoned mode does not allow that. */
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
block_group->length);
spin_lock(&block_group->lock);
/*
* Bail out if someone already deactivated the block group, or
* allocated space is left in the block group.
*/
if (!block_group->zone_is_active) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return 0;
}
if (block_group->reserved) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return -EAGAIN;
}
block_group->zone_is_active = 0;
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
btrfs_clear_treelog_bg(block_group);
spin_unlock(&block_group->lock);
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
device->zone_info->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
btrfs_dec_block_group_ro(block_group);
if (!ret) {
btrfs_dev_clear_active_zone(device, physical);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(!list_empty(&block_group->active_bg_list));
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
/* For active_bg_list */
btrfs_put_block_group(block_group);
}
return ret;
}

Просмотреть файл

@ -69,6 +69,8 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
u64 physical_start, u64 physical_pos); u64 physical_start, u64 physical_pos);
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
u64 logical, u64 length); u64 logical, u64 length);
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
int btrfs_zone_finish(struct btrfs_block_group *block_group);
#else /* CONFIG_BLK_DEV_ZONED */ #else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone) struct blk_zone *zone)
@ -204,6 +206,16 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
} }
static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{
return true;
}
static inline int btrfs_zone_finish(struct btrfs_block_group *block_group)
{
return 0;
}
#endif #endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)