btrfs: zoned: implement active zone tracking
Add zone_is_active flag to btrfs_block_group. This flag indicates the underlying zones are all active. Such zone active block groups are tracked by fs_info->active_bg_list. btrfs_dev_{set,clear}_active_zone() take responsibility for the underlying device part. They set/clear the bitmap to indicate zone activeness and count the number of zones we can activate left. btrfs_zone_{activate,finish}() take responsibility for the logical part and the list management. In addition, btrfs_zone_finish() wait for any writes on it and send REQ_OP_ZONE_FINISH to the zone. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Родитель
dafc340dbd
Коммит
afba2bc036
|
@ -1896,6 +1896,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||||
INIT_LIST_HEAD(&cache->discard_list);
|
INIT_LIST_HEAD(&cache->discard_list);
|
||||||
INIT_LIST_HEAD(&cache->dirty_list);
|
INIT_LIST_HEAD(&cache->dirty_list);
|
||||||
INIT_LIST_HEAD(&cache->io_list);
|
INIT_LIST_HEAD(&cache->io_list);
|
||||||
|
INIT_LIST_HEAD(&cache->active_bg_list);
|
||||||
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||||
atomic_set(&cache->frozen, 0);
|
atomic_set(&cache->frozen, 0);
|
||||||
mutex_init(&cache->free_space_lock);
|
mutex_init(&cache->free_space_lock);
|
||||||
|
@ -3842,6 +3843,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||||
}
|
}
|
||||||
spin_unlock(&info->unused_bgs_lock);
|
spin_unlock(&info->unused_bgs_lock);
|
||||||
|
|
||||||
|
spin_lock(&info->zone_active_bgs_lock);
|
||||||
|
while (!list_empty(&info->zone_active_bgs)) {
|
||||||
|
block_group = list_first_entry(&info->zone_active_bgs,
|
||||||
|
struct btrfs_block_group,
|
||||||
|
active_bg_list);
|
||||||
|
list_del_init(&block_group->active_bg_list);
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
}
|
||||||
|
spin_unlock(&info->zone_active_bgs_lock);
|
||||||
|
|
||||||
spin_lock(&info->block_group_cache_lock);
|
spin_lock(&info->block_group_cache_lock);
|
||||||
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
||||||
block_group = rb_entry(n, struct btrfs_block_group,
|
block_group = rb_entry(n, struct btrfs_block_group,
|
||||||
|
|
|
@ -98,6 +98,7 @@ struct btrfs_block_group {
|
||||||
unsigned int to_copy:1;
|
unsigned int to_copy:1;
|
||||||
unsigned int relocating_repair:1;
|
unsigned int relocating_repair:1;
|
||||||
unsigned int chunk_item_inserted:1;
|
unsigned int chunk_item_inserted:1;
|
||||||
|
unsigned int zone_is_active:1;
|
||||||
|
|
||||||
int disk_cache_state;
|
int disk_cache_state;
|
||||||
|
|
||||||
|
@ -205,6 +206,7 @@ struct btrfs_block_group {
|
||||||
u64 zone_capacity;
|
u64 zone_capacity;
|
||||||
u64 meta_write_pointer;
|
u64 meta_write_pointer;
|
||||||
struct map_lookup *physical_map;
|
struct map_lookup *physical_map;
|
||||||
|
struct list_head active_bg_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||||
|
|
|
@ -1018,6 +1018,9 @@ struct btrfs_fs_info {
|
||||||
spinlock_t treelog_bg_lock;
|
spinlock_t treelog_bg_lock;
|
||||||
u64 treelog_bg;
|
u64 treelog_bg;
|
||||||
|
|
||||||
|
spinlock_t zone_active_bgs_lock;
|
||||||
|
struct list_head zone_active_bgs;
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
spinlock_t ref_verify_lock;
|
spinlock_t ref_verify_lock;
|
||||||
struct rb_root block_tree;
|
struct rb_root block_tree;
|
||||||
|
|
|
@ -2884,6 +2884,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||||
spin_lock_init(&fs_info->buffer_lock);
|
spin_lock_init(&fs_info->buffer_lock);
|
||||||
spin_lock_init(&fs_info->unused_bgs_lock);
|
spin_lock_init(&fs_info->unused_bgs_lock);
|
||||||
spin_lock_init(&fs_info->treelog_bg_lock);
|
spin_lock_init(&fs_info->treelog_bg_lock);
|
||||||
|
spin_lock_init(&fs_info->zone_active_bgs_lock);
|
||||||
rwlock_init(&fs_info->tree_mod_log_lock);
|
rwlock_init(&fs_info->tree_mod_log_lock);
|
||||||
mutex_init(&fs_info->unused_bg_unpin_mutex);
|
mutex_init(&fs_info->unused_bg_unpin_mutex);
|
||||||
mutex_init(&fs_info->reclaim_bgs_lock);
|
mutex_init(&fs_info->reclaim_bgs_lock);
|
||||||
|
@ -2897,6 +2898,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||||
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
||||||
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
|
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
|
||||||
|
INIT_LIST_HEAD(&fs_info->zone_active_bgs);
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
INIT_LIST_HEAD(&fs_info->allocated_roots);
|
INIT_LIST_HEAD(&fs_info->allocated_roots);
|
||||||
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
||||||
|
|
|
@ -2763,8 +2763,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||||
* out the free space after the allocation offset.
|
* out the free space after the allocation offset.
|
||||||
*/
|
*/
|
||||||
if (btrfs_is_zoned(fs_info)) {
|
if (btrfs_is_zoned(fs_info)) {
|
||||||
btrfs_info(fs_info, "free space %llu",
|
btrfs_info(fs_info, "free space %llu active %d",
|
||||||
block_group->zone_capacity - block_group->alloc_offset);
|
block_group->zone_capacity - block_group->alloc_offset,
|
||||||
|
block_group->zone_is_active);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
193
fs/btrfs/zoned.c
193
fs/btrfs/zoned.c
|
@ -989,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
unsigned int zno = (pos >> zone_info->zone_size_shift);
|
||||||
|
|
||||||
|
/* We can use any number of zones */
|
||||||
|
if (zone_info->max_active_zones == 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!test_bit(zno, zone_info->active_zones)) {
|
||||||
|
/* Active zone left? */
|
||||||
|
if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0)
|
||||||
|
return false;
|
||||||
|
if (test_and_set_bit(zno, zone_info->active_zones)) {
|
||||||
|
/* Someone already set the bit */
|
||||||
|
atomic_inc(&zone_info->active_zones_left);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
unsigned int zno = (pos >> zone_info->zone_size_shift);
|
||||||
|
|
||||||
|
/* We can use any number of zones */
|
||||||
|
if (zone_info->max_active_zones == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (test_and_clear_bit(zno, zone_info->active_zones))
|
||||||
|
atomic_inc(&zone_info->active_zones_left);
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
||||||
u64 length, u64 *bytes)
|
u64 length, u64 *bytes)
|
||||||
{
|
{
|
||||||
|
@ -1004,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
||||||
*bytes = length;
|
*bytes = length;
|
||||||
while (length) {
|
while (length) {
|
||||||
btrfs_dev_set_zone_empty(device, physical);
|
btrfs_dev_set_zone_empty(device, physical);
|
||||||
|
btrfs_dev_clear_active_zone(device, physical);
|
||||||
physical += device->zone_info->zone_size;
|
physical += device->zone_info->zone_size;
|
||||||
length -= device->zone_info->zone_size;
|
length -= device->zone_info->zone_size;
|
||||||
}
|
}
|
||||||
|
@ -1656,3 +1692,160 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
||||||
|
|
||||||
return device;
|
return device;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Activate block group and underlying device zones
|
||||||
|
*
|
||||||
|
* @block_group: the block group to activate
|
||||||
|
*
|
||||||
|
* Return: true on success, false otherwise
|
||||||
|
*/
|
||||||
|
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||||
|
struct map_lookup *map;
|
||||||
|
struct btrfs_device *device;
|
||||||
|
u64 physical;
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
if (!btrfs_is_zoned(block_group->fs_info))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
map = block_group->physical_map;
|
||||||
|
/* Currently support SINGLE profile only */
|
||||||
|
ASSERT(map->num_stripes == 1);
|
||||||
|
device = map->stripes[0].dev;
|
||||||
|
physical = map->stripes[0].physical;
|
||||||
|
|
||||||
|
if (device->zone_info->max_active_zones == 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
|
||||||
|
if (block_group->zone_is_active) {
|
||||||
|
ret = true;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No space left */
|
||||||
|
if (block_group->alloc_offset == block_group->zone_capacity) {
|
||||||
|
ret = false;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!btrfs_dev_set_active_zone(device, physical)) {
|
||||||
|
/* Cannot activate the zone */
|
||||||
|
ret = false;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Successfully activated all the zones */
|
||||||
|
block_group->zone_is_active = 1;
|
||||||
|
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
/* For the active block group list */
|
||||||
|
btrfs_get_block_group(block_group);
|
||||||
|
|
||||||
|
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||||
|
ASSERT(list_empty(&block_group->active_bg_list));
|
||||||
|
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
|
||||||
|
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||||
|
struct map_lookup *map;
|
||||||
|
struct btrfs_device *device;
|
||||||
|
u64 physical;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!btrfs_is_zoned(fs_info))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
map = block_group->physical_map;
|
||||||
|
/* Currently support SINGLE profile only */
|
||||||
|
ASSERT(map->num_stripes == 1);
|
||||||
|
|
||||||
|
device = map->stripes[0].dev;
|
||||||
|
physical = map->stripes[0].physical;
|
||||||
|
|
||||||
|
if (device->zone_info->max_active_zones == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
if (!block_group->zone_is_active) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if we have unwritten allocated space */
|
||||||
|
if ((block_group->flags &
|
||||||
|
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
|
||||||
|
block_group->alloc_offset > block_group->meta_write_pointer) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
ret = btrfs_inc_block_group_ro(block_group, false);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/* Ensure all writes in this block group finish */
|
||||||
|
btrfs_wait_block_group_reservations(block_group);
|
||||||
|
/* No need to wait for NOCOW writers. Zoned mode does not allow that. */
|
||||||
|
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||||
|
block_group->length);
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bail out if someone already deactivated the block group, or
|
||||||
|
* allocated space is left in the block group.
|
||||||
|
*/
|
||||||
|
if (!block_group->zone_is_active) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block_group->reserved) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
block_group->zone_is_active = 0;
|
||||||
|
block_group->alloc_offset = block_group->zone_capacity;
|
||||||
|
block_group->free_space_ctl->free_space = 0;
|
||||||
|
btrfs_clear_treelog_bg(block_group);
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||||
|
physical >> SECTOR_SHIFT,
|
||||||
|
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||||
|
GFP_NOFS);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
btrfs_dev_clear_active_zone(device, physical);
|
||||||
|
|
||||||
|
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||||
|
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||||
|
list_del_init(&block_group->active_bg_list);
|
||||||
|
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||||
|
|
||||||
|
/* For active_bg_list */
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
|
@ -69,6 +69,8 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
|
||||||
u64 physical_start, u64 physical_pos);
|
u64 physical_start, u64 physical_pos);
|
||||||
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
||||||
u64 logical, u64 length);
|
u64 logical, u64 length);
|
||||||
|
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
|
||||||
|
int btrfs_zone_finish(struct btrfs_block_group *block_group);
|
||||||
#else /* CONFIG_BLK_DEV_ZONED */
|
#else /* CONFIG_BLK_DEV_ZONED */
|
||||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||||
struct blk_zone *zone)
|
struct blk_zone *zone)
|
||||||
|
@ -204,6 +206,16 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
|
||||||
return ERR_PTR(-EOPNOTSUPP);
|
return ERR_PTR(-EOPNOTSUPP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче