From afba2bc036b0ed983bef6bd7c00c827e97d1ba31 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:17 +0900 Subject: [PATCH] btrfs: zoned: implement active zone tracking Add zone_is_active flag to btrfs_block_group. This flag indicates the underlying zones are all active. Such zone active block groups are tracked by fs_info->active_bg_list. btrfs_dev_{set,clear}_active_zone() take responsibility for the underlying device part. They set/clear the bitmap to indicate zone activeness and count the number of zones we can activate left. btrfs_zone_{activate,finish}() take responsibility for the logical part and the list management. In addition, btrfs_zone_finish() wait for any writes on it and send REQ_OP_ZONE_FINISH to the zone. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 11 ++ fs/btrfs/block-group.h | 2 + fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 2 + fs/btrfs/free-space-cache.c | 5 +- fs/btrfs/zoned.c | 193 ++++++++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 12 +++ 7 files changed, 226 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 4f8f04ed911e..8e7b74fa3fc8 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1896,6 +1896,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( INIT_LIST_HEAD(&cache->discard_list); INIT_LIST_HEAD(&cache->dirty_list); INIT_LIST_HEAD(&cache->io_list); + INIT_LIST_HEAD(&cache->active_bg_list); btrfs_init_free_space_ctl(cache, cache->free_space_ctl); atomic_set(&cache->frozen, 0); mutex_init(&cache->free_space_lock); @@ -3842,6 +3843,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) } spin_unlock(&info->unused_bgs_lock); + spin_lock(&info->zone_active_bgs_lock); + while (!list_empty(&info->zone_active_bgs)) { + block_group = list_first_entry(&info->zone_active_bgs, + struct btrfs_block_group, + active_bg_list); + list_del_init(&block_group->active_bg_list); + btrfs_put_block_group(block_group); + } + spin_unlock(&info->zone_active_bgs_lock); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group, diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 265db2c316d3..f751b802b173 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -98,6 +98,7 @@ struct btrfs_block_group { unsigned int to_copy:1; unsigned int relocating_repair:1; unsigned int chunk_item_inserted:1; + unsigned int zone_is_active:1; int disk_cache_state; @@ -205,6 +206,7 @@ struct btrfs_block_group { u64 zone_capacity; u64 meta_write_pointer; struct map_lookup *physical_map; + struct list_head active_bg_list; }; static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f0174c62274..bfba85d9f862 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1018,6 +1018,9 @@ struct btrfs_fs_info { spinlock_t treelog_bg_lock; u64 treelog_bg; + spinlock_t zone_active_bgs_lock; + struct list_head zone_active_bgs; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f755d02dc088..41ea50f48cfe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2884,6 +2884,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); + spin_lock_init(&fs_info->zone_active_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); @@ -2897,6 +2898,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs); + INIT_LIST_HEAD(&fs_info->zone_active_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_ebs); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9ce0f9b23812..0d26819b1cf6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2763,8 +2763,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group, * out the free space after the allocation offset. */ if (btrfs_is_zoned(fs_info)) { - btrfs_info(fs_info, "free space %llu", - block_group->zone_capacity - block_group->alloc_offset); + btrfs_info(fs_info, "free space %llu active %d", + block_group->zone_capacity - block_group->alloc_offset, + block_group->zone_is_active); return; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 4c89ac02843e..614499a83e8c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -989,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, return pos; } +static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos) +{ + struct btrfs_zoned_device_info *zone_info = device->zone_info; + unsigned int zno = (pos >> zone_info->zone_size_shift); + + /* We can use any number of zones */ + if (zone_info->max_active_zones == 0) + return true; + + if (!test_bit(zno, zone_info->active_zones)) { + /* Active zone left? */ + if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) + return false; + if (test_and_set_bit(zno, zone_info->active_zones)) { + /* Someone already set the bit */ + atomic_inc(&zone_info->active_zones_left); + } + } + + return true; +} + +static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos) +{ + struct btrfs_zoned_device_info *zone_info = device->zone_info; + unsigned int zno = (pos >> zone_info->zone_size_shift); + + /* We can use any number of zones */ + if (zone_info->max_active_zones == 0) + return; + + if (test_and_clear_bit(zno, zone_info->active_zones)) + atomic_inc(&zone_info->active_zones_left); +} + int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, u64 length, u64 *bytes) { @@ -1004,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, *bytes = length; while (length) { btrfs_dev_set_zone_empty(device, physical); + btrfs_dev_clear_active_zone(device, physical); physical += device->zone_info->zone_size; length -= device->zone_info->zone_size; } @@ -1656,3 +1692,160 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, return device; } + +/** + * Activate block group and underlying device zones + * + * @block_group: the block group to activate + * + * Return: true on success, false otherwise + */ +bool btrfs_zone_activate(struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = block_group->fs_info; + struct map_lookup *map; + struct btrfs_device *device; + u64 physical; + bool ret; + + if (!btrfs_is_zoned(block_group->fs_info)) + return true; + + map = block_group->physical_map; + /* Currently support SINGLE profile only */ + ASSERT(map->num_stripes == 1); + device = map->stripes[0].dev; + physical = map->stripes[0].physical; + + if (device->zone_info->max_active_zones == 0) + return true; + + spin_lock(&block_group->lock); + + if (block_group->zone_is_active) { + ret = true; + goto out_unlock; + } + + /* No space left */ + if (block_group->alloc_offset == block_group->zone_capacity) { + ret = false; + goto out_unlock; + } + + if (!btrfs_dev_set_active_zone(device, physical)) { + /* Cannot activate the zone */ + ret = false; + goto out_unlock; + } + + /* Successfully activated all the zones */ + block_group->zone_is_active = 1; + + spin_unlock(&block_group->lock); + + /* For the active block group list */ + btrfs_get_block_group(block_group); + + spin_lock(&fs_info->zone_active_bgs_lock); + ASSERT(list_empty(&block_group->active_bg_list)); + list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); + + return true; + +out_unlock: + spin_unlock(&block_group->lock); + return ret; +} + +int btrfs_zone_finish(struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = block_group->fs_info; + struct map_lookup *map; + struct btrfs_device *device; + u64 physical; + int ret = 0; + + if (!btrfs_is_zoned(fs_info)) + return 0; + + map = block_group->physical_map; + /* Currently support SINGLE profile only */ + ASSERT(map->num_stripes == 1); + + device = map->stripes[0].dev; + physical = map->stripes[0].physical; + + if (device->zone_info->max_active_zones == 0) + return 0; + + spin_lock(&block_group->lock); + if (!block_group->zone_is_active) { + spin_unlock(&block_group->lock); + return 0; + } + + /* Check if we have unwritten allocated space */ + if ((block_group->flags & + (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) && + block_group->alloc_offset > block_group->meta_write_pointer) { + spin_unlock(&block_group->lock); + return -EAGAIN; + } + spin_unlock(&block_group->lock); + + ret = btrfs_inc_block_group_ro(block_group, false); + if (ret) + return ret; + + /* Ensure all writes in this block group finish */ + btrfs_wait_block_group_reservations(block_group); + /* No need to wait for NOCOW writers. Zoned mode does not allow that. */ + btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, + block_group->length); + + spin_lock(&block_group->lock); + + /* + * Bail out if someone already deactivated the block group, or + * allocated space is left in the block group. + */ + if (!block_group->zone_is_active) { + spin_unlock(&block_group->lock); + btrfs_dec_block_group_ro(block_group); + return 0; + } + + if (block_group->reserved) { + spin_unlock(&block_group->lock); + btrfs_dec_block_group_ro(block_group); + return -EAGAIN; + } + + block_group->zone_is_active = 0; + block_group->alloc_offset = block_group->zone_capacity; + block_group->free_space_ctl->free_space = 0; + btrfs_clear_treelog_bg(block_group); + spin_unlock(&block_group->lock); + + ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, + physical >> SECTOR_SHIFT, + device->zone_info->zone_size >> SECTOR_SHIFT, + GFP_NOFS); + btrfs_dec_block_group_ro(block_group); + + if (!ret) { + btrfs_dev_clear_active_zone(device, physical); + + spin_lock(&fs_info->zone_active_bgs_lock); + ASSERT(!list_empty(&block_group->active_bg_list)); + list_del_init(&block_group->active_bg_list); + spin_unlock(&fs_info->zone_active_bgs_lock); + + /* For active_bg_list */ + btrfs_put_block_group(block_group); + } + + return ret; +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 48628782e4b8..2345ecfa1805 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -69,6 +69,8 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical, u64 physical_start, u64 physical_pos); struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, u64 logical, u64 length); +bool btrfs_zone_activate(struct btrfs_block_group *block_group); +int btrfs_zone_finish(struct btrfs_block_group *block_group); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -204,6 +206,16 @@ static inline struct btrfs_device *btrfs_zoned_get_device( return ERR_PTR(-EOPNOTSUPP); } +static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group) +{ + return true; +} + +static inline int btrfs_zone_finish(struct btrfs_block_group *block_group) +{ + return 0; +} + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)