From 2446dba03f9dabe0b477a126cbeb377854785b47 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 Jul 2014 10:16:29 +1000 Subject: [PATCH 1/5] md/raid1,raid10: always abort recover on write error. Currently we don't abort recovery on a write error if the write error to the recovering device was triggerd by normal IO (as opposed to recovery IO). This means that for one bitmap region, the recovery might write to the recovering device for a few sectors, then not bother for subsequent sectors (as it never writes to failed devices). In this case the bitmap bit will be cleared, but it really shouldn't. The result is that if the recovering device fails and is then re-added (after fixing whatever hardware problem triggerred the failure), the second recovery won't redo the region it was in the middle of, so some of the device will not be recovered properly. If we abort the recovery, the region being processes will be cancelled (bit not cleared) and the whole region will be retried. As the bug can result in data corruption the patch is suitable for -stable. For kernels prior to 3.11 there is a conflict in raid10.c which will require care. Original-from: jiao hui Reported-and-tested-by: jiao hui Signed-off-by: NeilBrown Cc: stable@vger.kernel.org --- drivers/md/raid1.c | 8 ++++---- drivers/md/raid10.c | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 56e24c072b62..d7690f86fdb9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1501,12 +1501,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) mddev->degraded++; set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } else set_bit(Faulty, &rdev->flags); + /* + * if recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index cb882aae9e20..b08c18871323 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1684,13 +1684,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) spin_unlock_irqrestore(&conf->device_lock, flags); return; } - if (test_and_clear_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) mddev->degraded++; - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - } + /* + * If recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); From af5628f05db62c656f994b2346897939b5110d6a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 Jul 2014 13:54:54 +1000 Subject: [PATCH 2/5] md: disable probing for md devices 512 and over. The way md devices are traditionally created in the kernel is simply to open the device with the desired major/minor number. This can be problematic as some support tools, notably udev and programs run by udev, can open a device just to see what is there, and find that it has created something. It is easy for a race to cause udev to open an md device just after it was destroy, causing it to suddenly re-appear. For some time we have had an alternate way to create md devices echo md_somename > /sys/modules/md_mod/paramaters/new_array This will always use a minor number of 512 or higher, which mdadm normally avoids. Using this makes the creation-by-opening unnecessary, but does not disable it, so it is still there to cause problems. This patch disable probing for devices with a major of 9 (MD_MAJOR) and a minor of 512 and up. This devices created by writing to new_array cannot be re-created by opening the node in /dev. Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 32fc19c540d4..1379b1a3b9ff 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8592,7 +8592,7 @@ static int __init md_init(void) goto err_mdp; mdp_major = ret; - blk_register_region(MKDEV(MD_MAJOR, 0), 1UL< Date: Thu, 7 Aug 2014 09:37:41 -0400 Subject: [PATCH 3/5] md: Recovery speed is wrong When we calculate the speed of recovery, the numerator that contains the recovery done sectors. It's need to subtract the sectors which don't finish recovery. Signed-off-by: Xiao Ni Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1379b1a3b9ff..eb8c93dff7c6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7376,7 +7376,7 @@ void md_do_sync(struct md_thread *thread) struct mddev *mddev2; unsigned int currspeed = 0, window; - sector_t max_sectors,j, io_sectors; + sector_t max_sectors,j, io_sectors, recovery_done; unsigned long mark[SYNC_MARKS]; unsigned long update_time; sector_t mark_cnt[SYNC_MARKS]; @@ -7652,7 +7652,8 @@ void md_do_sync(struct md_thread *thread) */ cond_resched(); - currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 + recovery_done = io_sectors - atomic_read(&mddev->recovery_active); + currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2 /((jiffies-mddev->resync_mark)/HZ +1) +1; if (currspeed > speed_min(mddev)) { From a8461a61c241a25afedbe493c13d98a6e0cf4246 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Aug 2014 16:34:27 +1000 Subject: [PATCH 4/5] md/raid0: check for bitmap compatability when changing raid levels. If an array has a bitmap, then it cannot be converted to raid0. Reported-by: Xiao Ni Signed-off-by: NeilBrown --- drivers/md/raid0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 407a99e46f69..cf91f5910c7c 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -685,6 +685,12 @@ static void *raid0_takeover(struct mddev *mddev) * raid10 - assuming we have all necessary active disks * raid1 - with (N -1) mirror drives faulty */ + + if (mddev->bitmap) { + printk(KERN_ERR "md/raid0: %s: cannot takeover array with bitmap\n", + mdname(mddev)); + return ERR_PTR(-EBUSY); + } if (mddev->level == 4) return raid0_takeover_raid45(mddev); From d66b1b395a59027a1c054e1fc57d582cb4194491 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 8 Aug 2014 15:40:24 +1000 Subject: [PATCH 5/5] md: don't allow bitmap file to be added to raid0/linear. An array can only accept a bitmap if it will call bitmap_daemon_work periodically, which means it needs a thread running. If there is no thread, don't allow a bitmap to be added. Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index eb8c93dff7c6..1294238610df 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5961,7 +5961,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) int err = 0; if (mddev->pers) { - if (!mddev->pers->quiesce) + if (!mddev->pers->quiesce || !mddev->thread) return -EBUSY; if (mddev->recovery || mddev->sync_thread) return -EBUSY; @@ -6263,7 +6263,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) rv = update_raid_disks(mddev, info->raid_disks); if ((state ^ info->state) & (1<pers->quiesce == NULL) + if (mddev->pers->quiesce == NULL || mddev->thread == NULL) return -EINVAL; if (mddev->recovery || mddev->sync_thread) return -EBUSY;