From 2a708cff93f1845b9239bc7d6310aef54e716c6a Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Thu, 1 Oct 2015 08:31:51 +0000 Subject: [PATCH 1/5] dm: fix AB-BA deadlock in __dm_destroy() __dm_destroy() takes io_barrier SRCU lock (dm_get_live_table) and suspend_lock in reverse order. Doing so can cause AB-BA deadlock: __dm_destroy dm_swap_table --------------------------------------------------- mutex_lock(suspend_lock) dm_get_live_table() srcu_read_lock(io_barrier) dm_sync_table() synchronize_srcu(io_barrier) .. waiting for dm_put_live_table() mutex_lock(suspend_lock) .. waiting for suspend_lock Fix this by taking the locks in proper order. Signed-off-by: Jun'ichi Nomura Fixes: ab7c7bb6f4ab ("dm: hold suspend_lock while suspending device during device deletion") Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6264781dc69a..7289ece3b560 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2837,8 +2837,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - map = dm_get_live_table(md, &srcu_idx); - spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); @@ -2852,14 +2850,14 @@ static void __dm_destroy(struct mapped_device *md, bool wait) * do not race with internal suspend. */ mutex_lock(&md->suspend_lock); + map = dm_get_live_table(md, &srcu_idx); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } - mutex_unlock(&md->suspend_lock); - /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); + mutex_unlock(&md->suspend_lock); /* * Rare, but there may be I/O requests still going to complete, From 042745ee53a0a7c1f5aff191a4a24213c6dcfb52 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 2 Oct 2015 11:17:37 -0400 Subject: [PATCH 2/5] dm raid: fix round up of default region size Commit 3a0f9aaee028 ("dm raid: round region_size to power of two") intended to make sure that the default region size is a power of two. However, the logic in that commit is incorrect and sets the variable region_size to 0 or 1, depending on whether min_region_size is a power of two. Fix this logic, using roundup_pow_of_two(), so that region_size is properly rounded up to the next power of two. Signed-off-by: Mikulas Patocka Fixes: 3a0f9aaee028 ("dm raid: round region_size to power of two") Cc: stable@vger.kernel.org # v3.8+ Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 97e165183e79..a0901214aef5 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -329,8 +329,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) */ if (min_region_size > (1 << 13)) { /* If not a power of 2, make it the next power of 2 */ - if (min_region_size & (min_region_size - 1)) - region_size = 1 << fls(region_size); + region_size = roundup_pow_of_two(min_region_size); DMINFO("Choosing default region size of %lu sectors", region_size); } else { From 50887bd139b83ce4489ed865a04bf1be5559c4ad Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 6 Oct 2015 04:19:54 +0000 Subject: [PATCH 3/5] dm: fix request-based dm error reporting end_clone_bio() is a endio callback for clone bio and should check and save the clone's bi_error for error reporting. However, 4246a0b63bd8 ("block: add a bi_error field to struct bio") changed the function to check the original bio's bi_error, which is 0. Without this fix, clone's error is ignored and reported to the original request as success. Thus data corruption will be observed. Fixes: 4246a0b63bd8 ("block: add a bi_error field to struct bio") Signed-off-by: Jun'ichi Nomura Cc: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7289ece3b560..1b5c6047e4f1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1001,6 +1001,7 @@ static void end_clone_bio(struct bio *clone) struct dm_rq_target_io *tio = info->tio; struct bio *bio = info->orig; unsigned int nr_bytes = info->orig->bi_iter.bi_size; + int error = clone->bi_error; bio_put(clone); @@ -1011,13 +1012,13 @@ static void end_clone_bio(struct bio *clone) * the remainder. */ return; - else if (bio->bi_error) { + else if (error) { /* * Don't notice the error to the upper layer yet. * The error handling decision is made by the target driver, * when the request is completed. */ - tio->error = bio->bi_error; + tio->error = error; return; } From 2bffa1503c5c06192eb1459180fac4416575a966 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 9 Oct 2015 14:03:38 +0100 Subject: [PATCH 4/5] dm cache: fix NULL pointer when switching from cleaner policy The cleaner policy doesn't make use of the per cache block hint space in the metadata (unlike the other policies). When switching from the cleaner policy to mq or smq a NULL pointer crash (in dm_tm_new_block) was observed. The crash was caused by bugs in dm-cache-metadata.c when trying to skip creation of the hint btree. The minimal fix is to change hint size for the cleaner policy to 4 bytes (only hint size supported). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-cache-policy-cleaner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index 240c9f0e85e7..8a096456579b 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -436,7 +436,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, static struct dm_cache_policy_type wb_policy_type = { .name = "cleaner", .version = {1, 0, 0}, - .hint_size = 0, + .hint_size = 4, .owner = THIS_MODULE, .create = wb_create }; From b0d3cc011e532d8c9db76cf717bcafa53c135595 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 8 Oct 2015 18:05:41 -0400 Subject: [PATCH 5/5] dm snapshot: add new persistent store option to support overflow Commit 76c44f6d80 introduced the possibly for "Overflow" to be reported by the snapshot device's status. Older userspace (e.g. lvm2) does not handle the "Overflow" status response. Fix this incompatibility by requiring newer userspace code, that can cope with "Overflow", request the persistent store with overflow support by using "PO" (Persistent with Overflow) for the snapshot store type. Reported-by: Zdenek Kabelac Fixes: 76c44f6d80 ("dm snapshot: don't invalidate on-disk image on snapshot write overflow") Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- Documentation/device-mapper/snapshot.txt | 10 +++++++--- drivers/md/dm-exception-store.c | 6 +++--- drivers/md/dm-exception-store.h | 5 +++-- drivers/md/dm-snap-persistent.c | 17 ++++++++++++++--- drivers/md/dm-snap-transient.c | 3 +-- drivers/md/dm-snap.c | 14 +++++++++----- 6 files changed, 37 insertions(+), 18 deletions(-) diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt index 0d5bc46dc167..ad6949bff2e3 100644 --- a/Documentation/device-mapper/snapshot.txt +++ b/Documentation/device-mapper/snapshot.txt @@ -41,9 +41,13 @@ useless and be disabled, returning errors. So it is important to monitor the amount of free space and expand the before it fills up. is P (Persistent) or N (Not persistent - will not survive -after reboot). -The difference is that for transient snapshots less metadata must be -saved on disk - they can be kept in memory by the kernel. +after reboot). O (Overflow) can be added as a persistent store option +to allow userspace to advertise its support for seeing "Overflow" in the +snapshot status. So supported store types are "P", "PO" and "N". + +The difference between persistent and transient is with transient +snapshots less metadata must be saved on disk - they can be kept in +memory by the kernel. * snapshot-merge diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index ebaa4f803eec..192bb8beeb6b 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -203,7 +203,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, return -EINVAL; } - tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL); + tmp_store = kzalloc(sizeof(*tmp_store), GFP_KERNEL); if (!tmp_store) { ti->error = "Exception store allocation failed"; return -ENOMEM; @@ -215,7 +215,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, else if (persistent == 'N') type = get_type("N"); else { - ti->error = "Persistent flag is not P or N"; + ti->error = "Exception store type is not P or N"; r = -EINVAL; goto bad_type; } @@ -233,7 +233,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, if (r) goto bad; - r = type->ctr(tmp_store, 0, NULL); + r = type->ctr(tmp_store, (strlen(argv[0]) > 1 ? &argv[0][1] : NULL)); if (r) { ti->error = "Exception store type constructor failed"; goto bad; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 0b2536247cf5..fae34e7a0b1e 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -42,8 +42,7 @@ struct dm_exception_store_type { const char *name; struct module *module; - int (*ctr) (struct dm_exception_store *store, - unsigned argc, char **argv); + int (*ctr) (struct dm_exception_store *store, char *options); /* * Destroys this object when you've finished with it. @@ -123,6 +122,8 @@ struct dm_exception_store { unsigned chunk_shift; void *context; + + bool userspace_supports_overflow; }; /* diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index bf71583296f7..aeacad9be51d 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -7,6 +7,7 @@ #include "dm-exception-store.h" +#include #include #include #include @@ -843,8 +844,7 @@ static void persistent_drop_snapshot(struct dm_exception_store *store) DMWARN("write header failed"); } -static int persistent_ctr(struct dm_exception_store *store, - unsigned argc, char **argv) +static int persistent_ctr(struct dm_exception_store *store, char *options) { struct pstore *ps; @@ -873,6 +873,16 @@ static int persistent_ctr(struct dm_exception_store *store, return -ENOMEM; } + if (options) { + char overflow = toupper(options[0]); + if (overflow == 'O') + store->userspace_supports_overflow = true; + else { + DMERR("Unsupported persistent store option: %s", options); + return -EINVAL; + } + } + store->context = ps; return 0; @@ -888,7 +898,8 @@ static unsigned persistent_status(struct dm_exception_store *store, case STATUSTYPE_INFO: break; case STATUSTYPE_TABLE: - DMEMIT(" P %llu", (unsigned long long)store->chunk_size); + DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P", + (unsigned long long)store->chunk_size); } return sz; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 1ce9a2586e41..9b7c8c8049d6 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -70,8 +70,7 @@ static void transient_usage(struct dm_exception_store *store, *metadata_sectors = 0; } -static int transient_ctr(struct dm_exception_store *store, - unsigned argc, char **argv) +static int transient_ctr(struct dm_exception_store *store, char *options) { struct transient_c *tc; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c0bcd6516dfe..c06b74e91cd6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1098,7 +1098,7 @@ static void stop_merge(struct dm_snapshot *s) } /* - * Construct a snapshot mapping:

+ * Construct a snapshot mapping: */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -1302,6 +1302,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, u.store_swap = snap_dest->store; snap_dest->store = snap_src->store; + snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow; snap_src->store = u.store_swap; snap_dest->store->snap = snap_dest; @@ -1739,8 +1740,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = __find_pending_exception(s, pe, chunk); if (!pe) { - s->snapshot_overflowed = 1; - DMERR("Snapshot overflowed: Unable to allocate exception."); + if (s->store->userspace_supports_overflow) { + s->snapshot_overflowed = 1; + DMERR("Snapshot overflowed: Unable to allocate exception."); + } else + __invalidate_snapshot(s, -ENOMEM); r = -EIO; goto out_unlock; } @@ -2365,7 +2369,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 14, 0}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2379,7 +2383,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr,