dm clone metadata: Track exact changes per transaction
Extend struct dirty_map with a second bitmap which tracks the exact
regions that were hydrated during the current metadata transaction.
Moreover, fix __flush_dmap() to only commit the metadata of the regions
that were hydrated during the current transaction.
This is required by the following commits to fix a data corruption bug.
Fixes: 7431b7835f
("dm: add clone target")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
Родитель
474e559567
Коммит
e6a505f3f9
|
@ -67,23 +67,34 @@ struct superblock_disk {
|
|||
* To save constantly doing look ups on disk we keep an in core copy of the
|
||||
* on-disk bitmap, the region_map.
|
||||
*
|
||||
* To further reduce metadata I/O overhead we use a second bitmap, the dmap
|
||||
* (dirty bitmap), which tracks the dirty words, i.e. longs, of the region_map.
|
||||
* In order to track which regions are hydrated during a metadata transaction,
|
||||
* we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
|
||||
* bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
|
||||
* tracks the regions that got hydrated during the current metadata
|
||||
* transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
|
||||
* the dirty_regions bitmap.
|
||||
*
|
||||
* This allows us to precisely track the regions that were hydrated during the
|
||||
* current metadata transaction and update the metadata accordingly, when we
|
||||
* commit the current transaction. This is important because dm-clone should
|
||||
* only commit the metadata of regions that were properly flushed to the
|
||||
* destination device beforehand. Otherwise, in case of a crash, we could end
|
||||
* up with a corrupted dm-clone device.
|
||||
*
|
||||
* When a region finishes hydrating dm-clone calls
|
||||
* dm_clone_set_region_hydrated(), or for discard requests
|
||||
* dm_clone_cond_set_range(), which sets the corresponding bits in region_map
|
||||
* and dmap.
|
||||
*
|
||||
* During a metadata commit we scan the dmap for dirty region_map words (longs)
|
||||
* and update accordingly the on-disk metadata. Thus, we don't have to flush to
|
||||
* disk the whole region_map. We can just flush the dirty region_map words.
|
||||
* During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
|
||||
* and update the on-disk metadata accordingly. Thus, we don't have to flush to
|
||||
* disk the whole region_map. We can just flush the dirty region_map bits.
|
||||
*
|
||||
* We use a dirty bitmap, which is smaller than the original region_map, to
|
||||
* reduce the amount of memory accesses during a metadata commit. As dm-bitset
|
||||
* accesses the on-disk bitmap in 64-bit word granularity, there is no
|
||||
* significant benefit in tracking the dirty region_map bits with a smaller
|
||||
* granularity.
|
||||
* We use the helper dmap->dirty_words bitmap, which is smaller than the
|
||||
* original region_map, to reduce the amount of memory accesses during a
|
||||
* metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
|
||||
* 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
|
||||
* accesses.
|
||||
*
|
||||
* We could update directly the on-disk bitmap, when dm-clone calls either
|
||||
* dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
|
||||
|
@ -92,12 +103,13 @@ struct superblock_disk {
|
|||
* e.g., in a hooked overwrite bio's completion routine, and further reduce the
|
||||
* I/O completion latency.
|
||||
*
|
||||
* We maintain two dirty bitmaps. During a metadata commit we atomically swap
|
||||
* the currently used dmap with the unused one. This allows the metadata update
|
||||
* functions to run concurrently with an ongoing commit.
|
||||
* We maintain two dirty bitmap sets. During a metadata commit we atomically
|
||||
* swap the currently used dmap with the unused one. This allows the metadata
|
||||
* update functions to run concurrently with an ongoing commit.
|
||||
*/
|
||||
struct dirty_map {
|
||||
unsigned long *dirty_words;
|
||||
unsigned long *dirty_regions;
|
||||
unsigned int changed;
|
||||
};
|
||||
|
||||
|
@ -461,22 +473,40 @@ static size_t bitmap_size(unsigned long nr_bits)
|
|||
return BITS_TO_LONGS(nr_bits) * sizeof(long);
|
||||
}
|
||||
|
||||
static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
|
||||
unsigned long nr_regions)
|
||||
{
|
||||
dmap->changed = 0;
|
||||
|
||||
dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
|
||||
if (!dmap->dirty_words)
|
||||
return -ENOMEM;
|
||||
|
||||
dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
|
||||
if (!dmap->dirty_regions) {
|
||||
kvfree(dmap->dirty_words);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __dirty_map_exit(struct dirty_map *dmap)
|
||||
{
|
||||
kvfree(dmap->dirty_words);
|
||||
kvfree(dmap->dirty_regions);
|
||||
}
|
||||
|
||||
static int dirty_map_init(struct dm_clone_metadata *cmd)
|
||||
{
|
||||
cmd->dmap[0].changed = 0;
|
||||
cmd->dmap[0].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
|
||||
|
||||
if (!cmd->dmap[0].dirty_words) {
|
||||
if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
|
||||
DMERR("Failed to allocate dirty bitmap");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
cmd->dmap[1].changed = 0;
|
||||
cmd->dmap[1].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
|
||||
|
||||
if (!cmd->dmap[1].dirty_words) {
|
||||
if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
|
||||
DMERR("Failed to allocate dirty bitmap");
|
||||
kvfree(cmd->dmap[0].dirty_words);
|
||||
__dirty_map_exit(&cmd->dmap[0]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -487,8 +517,8 @@ static int dirty_map_init(struct dm_clone_metadata *cmd)
|
|||
|
||||
static void dirty_map_exit(struct dm_clone_metadata *cmd)
|
||||
{
|
||||
kvfree(cmd->dmap[0].dirty_words);
|
||||
kvfree(cmd->dmap[1].dirty_words);
|
||||
__dirty_map_exit(&cmd->dmap[0]);
|
||||
__dirty_map_exit(&cmd->dmap[1]);
|
||||
}
|
||||
|
||||
static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
|
||||
|
@ -633,21 +663,23 @@ unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd
|
|||
return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
|
||||
}
|
||||
|
||||
static int __update_metadata_word(struct dm_clone_metadata *cmd, unsigned long word)
|
||||
static int __update_metadata_word(struct dm_clone_metadata *cmd,
|
||||
unsigned long *dirty_regions,
|
||||
unsigned long word)
|
||||
{
|
||||
int r;
|
||||
unsigned long index = word * BITS_PER_LONG;
|
||||
unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
|
||||
|
||||
while (index < max_index) {
|
||||
if (test_bit(index, cmd->region_map)) {
|
||||
if (test_bit(index, dirty_regions)) {
|
||||
r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
|
||||
index, &cmd->bitset_root);
|
||||
|
||||
if (r) {
|
||||
DMERR("dm_bitset_set_bit failed");
|
||||
return r;
|
||||
}
|
||||
__clear_bit(index, dirty_regions);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
@ -721,7 +753,7 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
|
|||
if (word == cmd->nr_words)
|
||||
break;
|
||||
|
||||
r = __update_metadata_word(cmd, word);
|
||||
r = __update_metadata_word(cmd, dmap->dirty_regions, word);
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -802,6 +834,7 @@ int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long re
|
|||
dmap = cmd->current_dmap;
|
||||
|
||||
__set_bit(word, dmap->dirty_words);
|
||||
__set_bit(region_nr, dmap->dirty_regions);
|
||||
__set_bit(region_nr, cmd->region_map);
|
||||
dmap->changed = 1;
|
||||
|
||||
|
@ -830,6 +863,7 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
|
|||
if (!test_bit(region_nr, cmd->region_map)) {
|
||||
word = region_nr / BITS_PER_LONG;
|
||||
__set_bit(word, dmap->dirty_words);
|
||||
__set_bit(region_nr, dmap->dirty_regions);
|
||||
__set_bit(region_nr, cmd->region_map);
|
||||
dmap->changed = 1;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче