Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded. Don't unconditionally set in_sync on newly added device in raid5_reshape md: allow v0.91 metadata to record devices as being active but not in-sync. md: factor out updating of 'recovery_offset'.
This commit is contained in:
Коммит
0e70613b1c
|
@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
desc->raid_disk < mddev->raid_disks */) {
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
rdev->raid_disk = desc->raid_disk;
|
||||
} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
|
||||
/* active but not in sync implies recovery up to
|
||||
* reshape position. We don't know exactly where
|
||||
* that is, so set to zero for now */
|
||||
if (mddev->minor_version >= 91) {
|
||||
rdev->recovery_offset = 0;
|
||||
rdev->raid_disk = desc->raid_disk;
|
||||
}
|
||||
}
|
||||
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
|
@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
||||
mdp_disk_t *d;
|
||||
int desc_nr;
|
||||
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
|
||||
&& !test_bit(Faulty, &rdev2->flags))
|
||||
int is_active = test_bit(In_sync, &rdev2->flags);
|
||||
|
||||
if (rdev2->raid_disk >= 0 &&
|
||||
sb->minor_version >= 91)
|
||||
/* we have nowhere to store the recovery_offset,
|
||||
* but if it is not below the reshape_position,
|
||||
* we can piggy-back on that.
|
||||
*/
|
||||
is_active = 1;
|
||||
if (rdev2->raid_disk < 0 ||
|
||||
test_bit(Faulty, &rdev2->flags))
|
||||
is_active = 0;
|
||||
if (is_active)
|
||||
desc_nr = rdev2->raid_disk;
|
||||
else
|
||||
desc_nr = next_spare++;
|
||||
|
@ -1043,15 +1062,15 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
d->number = rdev2->desc_nr;
|
||||
d->major = MAJOR(rdev2->bdev->bd_dev);
|
||||
d->minor = MINOR(rdev2->bdev->bd_dev);
|
||||
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
|
||||
&& !test_bit(Faulty, &rdev2->flags))
|
||||
if (is_active)
|
||||
d->raid_disk = rdev2->raid_disk;
|
||||
else
|
||||
d->raid_disk = rdev2->desc_nr; /* compatibility */
|
||||
if (test_bit(Faulty, &rdev2->flags))
|
||||
d->state = (1<<MD_DISK_FAULTY);
|
||||
else if (test_bit(In_sync, &rdev2->flags)) {
|
||||
else if (is_active) {
|
||||
d->state = (1<<MD_DISK_ACTIVE);
|
||||
if (test_bit(In_sync, &rdev2->flags))
|
||||
d->state |= (1<<MD_DISK_SYNC);
|
||||
active++;
|
||||
working++;
|
||||
|
@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags)) {
|
||||
if (mddev->curr_resync_completed > rdev->recovery_offset)
|
||||
rdev->recovery_offset = mddev->curr_resync_completed;
|
||||
if (rdev->recovery_offset > 0) {
|
||||
sb->feature_map |=
|
||||
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
|
||||
|
@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares)
|
|||
*/
|
||||
mdk_rdev_t *rdev;
|
||||
|
||||
/* First make sure individual recovery_offsets are correct */
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
mddev->curr_resync_completed > rdev->recovery_offset)
|
||||
rdev->recovery_offset = mddev->curr_resync_completed;
|
||||
|
||||
}
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
if (rdev->sb_events == mddev->events ||
|
||||
(nospares &&
|
||||
|
|
|
@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
|
|||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
|
||||
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
|
||||
{
|
||||
switch (algo) {
|
||||
case ALGORITHM_PARITY_0:
|
||||
if (raid_disk < max_degraded)
|
||||
return 1;
|
||||
break;
|
||||
case ALGORITHM_PARITY_N:
|
||||
if (raid_disk >= raid_disks - max_degraded)
|
||||
return 1;
|
||||
break;
|
||||
case ALGORITHM_PARITY_0_6:
|
||||
if (raid_disk == 0 ||
|
||||
raid_disk == raid_disks - 1)
|
||||
return 1;
|
||||
break;
|
||||
case ALGORITHM_LEFT_ASYMMETRIC_6:
|
||||
case ALGORITHM_RIGHT_ASYMMETRIC_6:
|
||||
case ALGORITHM_LEFT_SYMMETRIC_6:
|
||||
case ALGORITHM_RIGHT_SYMMETRIC_6:
|
||||
if (raid_disk == raid_disks - 1)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run(mddev_t *mddev)
|
||||
{
|
||||
raid5_conf_t *conf;
|
||||
int working_disks = 0, chunk_size;
|
||||
int dirty_parity_disks = 0;
|
||||
mdk_rdev_t *rdev;
|
||||
sector_t reshape_offset = 0;
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
printk(KERN_NOTICE "raid5: %s is not clean"
|
||||
|
@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev)
|
|||
"on a stripe boundary\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
reshape_offset = here_new * mddev->new_chunk_sectors;
|
||||
/* here_new is the stripe we will write to */
|
||||
here_old = mddev->reshape_position;
|
||||
sector_div(here_old, mddev->chunk_sectors *
|
||||
|
@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev)
|
|||
/*
|
||||
* 0 for a fully functional array, 1 or 2 for a degraded array.
|
||||
*/
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
test_bit(In_sync, &rdev->flags))
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
if (rdev->raid_disk < 0)
|
||||
continue;
|
||||
if (test_bit(In_sync, &rdev->flags))
|
||||
working_disks++;
|
||||
/* This disc is not fully in-sync. However if it
|
||||
* just stored parity (beyond the recovery_offset),
|
||||
* when we don't need to be concerned about the
|
||||
* array being dirty.
|
||||
* When reshape goes 'backwards', we never have
|
||||
* partially completed devices, so we only need
|
||||
* to worry about reshape going forwards.
|
||||
*/
|
||||
/* Hack because v0.91 doesn't store recovery_offset properly. */
|
||||
if (mddev->major_version == 0 &&
|
||||
mddev->minor_version > 90)
|
||||
rdev->recovery_offset = reshape_offset;
|
||||
|
||||
printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
|
||||
rdev->raid_disk, working_disks, conf->prev_algo,
|
||||
conf->previous_raid_disks, conf->max_degraded,
|
||||
conf->algorithm, conf->raid_disks,
|
||||
only_parity(rdev->raid_disk,
|
||||
conf->prev_algo,
|
||||
conf->previous_raid_disks,
|
||||
conf->max_degraded),
|
||||
only_parity(rdev->raid_disk,
|
||||
conf->algorithm,
|
||||
conf->raid_disks,
|
||||
conf->max_degraded));
|
||||
if (rdev->recovery_offset < reshape_offset) {
|
||||
/* We need to check old and new layout */
|
||||
if (!only_parity(rdev->raid_disk,
|
||||
conf->algorithm,
|
||||
conf->raid_disks,
|
||||
conf->max_degraded))
|
||||
continue;
|
||||
}
|
||||
if (!only_parity(rdev->raid_disk,
|
||||
conf->prev_algo,
|
||||
conf->previous_raid_disks,
|
||||
conf->max_degraded))
|
||||
continue;
|
||||
dirty_parity_disks++;
|
||||
}
|
||||
|
||||
mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
|
||||
- working_disks);
|
||||
|
@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev)
|
|||
mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
|
||||
mddev->resync_max_sectors = mddev->dev_sectors;
|
||||
|
||||
if (mddev->degraded > 0 &&
|
||||
if (mddev->degraded > dirty_parity_disks &&
|
||||
mddev->recovery_cp != MaxSector) {
|
||||
if (mddev->ok_start_degraded)
|
||||
printk(KERN_WARNING
|
||||
|
@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev)
|
|||
!test_bit(Faulty, &rdev->flags)) {
|
||||
if (raid5_add_disk(mddev, rdev) == 0) {
|
||||
char nm[20];
|
||||
if (rdev->raid_disk >= conf->previous_raid_disks)
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
added_devices++;
|
||||
else
|
||||
rdev->recovery_offset = 0;
|
||||
added_devices++;
|
||||
sprintf(nm, "rd%d", rdev->raid_disk);
|
||||
if (sysfs_create_link(&mddev->kobj,
|
||||
&rdev->kobj, nm))
|
||||
|
|
Загрузка…
Ссылка в новой задаче