md: support 'external' metadata for md arrays
- Add a state flag 'external' to indicate that the metadata is managed externally (by user-space) so important changes need to be left of user-space to handle. Alternates are non-persistant ('none') where there is no stable metadata - after the array is stopped there is no record of it's status - and internal which can be version 0.90 or version 1.x These are selected by writing to the 'metadata' attribute. - move the updating of superblocks (sync_sbs) to after we have checked if there are any superblocks or not. - New array state 'write_pending'. This means that the metadata records the array as 'clean', but a write has been requested, so the metadata has to be updated to record a 'dirty' array before the write can continue. This change is reported to md by writing 'active' to the array_state attribute. - tidy up marking of sb_dirty: - don't set sb_dirty when resync finishes as md_check_recovery calls md_update_sb when the sync thread finishes anyway. - Don't set sb_dirty in multipath_run as the array might not be dirty. - don't mark superblock dirty when switching to 'clean' if there is no internal superblock (if external, userspace can choose to update the superblock whenever it chooses to). Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
b47490c9bc
Коммит
e691063a61
|
@ -778,7 +778,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
mddev->major_version = 0;
|
mddev->major_version = 0;
|
||||||
mddev->minor_version = sb->minor_version;
|
mddev->minor_version = sb->minor_version;
|
||||||
mddev->patch_version = sb->patch_version;
|
mddev->patch_version = sb->patch_version;
|
||||||
mddev->persistent = ! sb->not_persistent;
|
mddev->persistent = 1;
|
||||||
|
mddev->external = 0;
|
||||||
mddev->chunk_size = sb->chunk_size;
|
mddev->chunk_size = sb->chunk_size;
|
||||||
mddev->ctime = sb->ctime;
|
mddev->ctime = sb->ctime;
|
||||||
mddev->utime = sb->utime;
|
mddev->utime = sb->utime;
|
||||||
|
@ -904,7 +905,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
sb->size = mddev->size;
|
sb->size = mddev->size;
|
||||||
sb->raid_disks = mddev->raid_disks;
|
sb->raid_disks = mddev->raid_disks;
|
||||||
sb->md_minor = mddev->md_minor;
|
sb->md_minor = mddev->md_minor;
|
||||||
sb->not_persistent = !mddev->persistent;
|
sb->not_persistent = 0;
|
||||||
sb->utime = mddev->utime;
|
sb->utime = mddev->utime;
|
||||||
sb->state = 0;
|
sb->state = 0;
|
||||||
sb->events_hi = (mddev->events>>32);
|
sb->events_hi = (mddev->events>>32);
|
||||||
|
@ -1158,6 +1159,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
mddev->major_version = 1;
|
mddev->major_version = 1;
|
||||||
mddev->patch_version = 0;
|
mddev->patch_version = 0;
|
||||||
mddev->persistent = 1;
|
mddev->persistent = 1;
|
||||||
|
mddev->external = 0;
|
||||||
mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
|
mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
|
||||||
mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
|
mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
|
||||||
mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
|
mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
|
||||||
|
@ -1696,18 +1698,20 @@ repeat:
|
||||||
MD_BUG();
|
MD_BUG();
|
||||||
mddev->events --;
|
mddev->events --;
|
||||||
}
|
}
|
||||||
sync_sbs(mddev, nospares);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* do not write anything to disk if using
|
* do not write anything to disk if using
|
||||||
* nonpersistent superblocks
|
* nonpersistent superblocks
|
||||||
*/
|
*/
|
||||||
if (!mddev->persistent) {
|
if (!mddev->persistent) {
|
||||||
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
|
if (!mddev->external)
|
||||||
|
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||||
|
|
||||||
spin_unlock_irq(&mddev->write_lock);
|
spin_unlock_irq(&mddev->write_lock);
|
||||||
wake_up(&mddev->sb_wait);
|
wake_up(&mddev->sb_wait);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
sync_sbs(mddev, nospares);
|
||||||
spin_unlock_irq(&mddev->write_lock);
|
spin_unlock_irq(&mddev->write_lock);
|
||||||
|
|
||||||
dprintk(KERN_INFO
|
dprintk(KERN_INFO
|
||||||
|
@ -2425,6 +2429,8 @@ array_state_show(mddev_t *mddev, char *page)
|
||||||
case 0:
|
case 0:
|
||||||
if (mddev->in_sync)
|
if (mddev->in_sync)
|
||||||
st = clean;
|
st = clean;
|
||||||
|
else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
|
||||||
|
st = write_pending;
|
||||||
else if (mddev->safemode)
|
else if (mddev->safemode)
|
||||||
st = active_idle;
|
st = active_idle;
|
||||||
else
|
else
|
||||||
|
@ -2455,11 +2461,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
break;
|
break;
|
||||||
case clear:
|
case clear:
|
||||||
/* stopping an active array */
|
/* stopping an active array */
|
||||||
if (mddev->pers) {
|
if (atomic_read(&mddev->active) > 1)
|
||||||
if (atomic_read(&mddev->active) > 1)
|
return -EBUSY;
|
||||||
return -EBUSY;
|
err = do_md_stop(mddev, 0);
|
||||||
err = do_md_stop(mddev, 0);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case inactive:
|
case inactive:
|
||||||
/* stopping an active array */
|
/* stopping an active array */
|
||||||
|
@ -2467,7 +2471,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
if (atomic_read(&mddev->active) > 1)
|
if (atomic_read(&mddev->active) > 1)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
err = do_md_stop(mddev, 2);
|
err = do_md_stop(mddev, 2);
|
||||||
}
|
} else
|
||||||
|
err = 0; /* already inactive */
|
||||||
break;
|
break;
|
||||||
case suspended:
|
case suspended:
|
||||||
break; /* not supported yet */
|
break; /* not supported yet */
|
||||||
|
@ -2495,9 +2500,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
restart_array(mddev);
|
restart_array(mddev);
|
||||||
spin_lock_irq(&mddev->write_lock);
|
spin_lock_irq(&mddev->write_lock);
|
||||||
if (atomic_read(&mddev->writes_pending) == 0) {
|
if (atomic_read(&mddev->writes_pending) == 0) {
|
||||||
mddev->in_sync = 1;
|
if (mddev->in_sync == 0) {
|
||||||
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
|
mddev->in_sync = 1;
|
||||||
}
|
if (mddev->persistent)
|
||||||
|
set_bit(MD_CHANGE_CLEAN,
|
||||||
|
&mddev->flags);
|
||||||
|
}
|
||||||
|
err = 0;
|
||||||
|
} else
|
||||||
|
err = -EBUSY;
|
||||||
spin_unlock_irq(&mddev->write_lock);
|
spin_unlock_irq(&mddev->write_lock);
|
||||||
} else {
|
} else {
|
||||||
mddev->ro = 0;
|
mddev->ro = 0;
|
||||||
|
@ -2508,7 +2519,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
case active:
|
case active:
|
||||||
if (mddev->pers) {
|
if (mddev->pers) {
|
||||||
restart_array(mddev);
|
restart_array(mddev);
|
||||||
clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
|
if (mddev->external)
|
||||||
|
clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
|
||||||
wake_up(&mddev->sb_wait);
|
wake_up(&mddev->sb_wait);
|
||||||
err = 0;
|
err = 0;
|
||||||
} else {
|
} else {
|
||||||
|
@ -2659,7 +2671,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
|
||||||
|
|
||||||
|
|
||||||
/* Metdata version.
|
/* Metdata version.
|
||||||
* This is either 'none' for arrays with externally managed metadata,
|
* This is one of
|
||||||
|
* 'none' for arrays with no metadata (good luck...)
|
||||||
|
* 'external' for arrays with externally managed metadata,
|
||||||
* or N.M for internally known formats
|
* or N.M for internally known formats
|
||||||
*/
|
*/
|
||||||
static ssize_t
|
static ssize_t
|
||||||
|
@ -2668,6 +2682,8 @@ metadata_show(mddev_t *mddev, char *page)
|
||||||
if (mddev->persistent)
|
if (mddev->persistent)
|
||||||
return sprintf(page, "%d.%d\n",
|
return sprintf(page, "%d.%d\n",
|
||||||
mddev->major_version, mddev->minor_version);
|
mddev->major_version, mddev->minor_version);
|
||||||
|
else if (mddev->external)
|
||||||
|
return sprintf(page, "external:%s\n", mddev->metadata_type);
|
||||||
else
|
else
|
||||||
return sprintf(page, "none\n");
|
return sprintf(page, "none\n");
|
||||||
}
|
}
|
||||||
|
@ -2682,6 +2698,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
|
|
||||||
if (cmd_match(buf, "none")) {
|
if (cmd_match(buf, "none")) {
|
||||||
mddev->persistent = 0;
|
mddev->persistent = 0;
|
||||||
|
mddev->external = 0;
|
||||||
|
mddev->major_version = 0;
|
||||||
|
mddev->minor_version = 90;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
if (strncmp(buf, "external:", 9) == 0) {
|
||||||
|
int namelen = len-9;
|
||||||
|
if (namelen >= sizeof(mddev->metadata_type))
|
||||||
|
namelen = sizeof(mddev->metadata_type)-1;
|
||||||
|
strncpy(mddev->metadata_type, buf+9, namelen);
|
||||||
|
mddev->metadata_type[namelen] = 0;
|
||||||
|
if (namelen && mddev->metadata_type[namelen-1] == '\n')
|
||||||
|
mddev->metadata_type[--namelen] = 0;
|
||||||
|
mddev->persistent = 0;
|
||||||
|
mddev->external = 1;
|
||||||
mddev->major_version = 0;
|
mddev->major_version = 0;
|
||||||
mddev->minor_version = 90;
|
mddev->minor_version = 90;
|
||||||
return len;
|
return len;
|
||||||
|
@ -2698,6 +2729,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
mddev->major_version = major;
|
mddev->major_version = major;
|
||||||
mddev->minor_version = minor;
|
mddev->minor_version = minor;
|
||||||
mddev->persistent = 1;
|
mddev->persistent = 1;
|
||||||
|
mddev->external = 0;
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3524,6 +3556,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
|
||||||
mddev->raid_disks = 0;
|
mddev->raid_disks = 0;
|
||||||
mddev->recovery_cp = 0;
|
mddev->recovery_cp = 0;
|
||||||
mddev->reshape_position = MaxSector;
|
mddev->reshape_position = MaxSector;
|
||||||
|
mddev->external = 0;
|
||||||
|
|
||||||
} else if (mddev->pers)
|
} else if (mddev->pers)
|
||||||
printk(KERN_INFO "md: %s switched to read-only mode.\n",
|
printk(KERN_INFO "md: %s switched to read-only mode.\n",
|
||||||
|
@ -4165,13 +4198,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
|
||||||
else
|
else
|
||||||
mddev->recovery_cp = 0;
|
mddev->recovery_cp = 0;
|
||||||
mddev->persistent = ! info->not_persistent;
|
mddev->persistent = ! info->not_persistent;
|
||||||
|
mddev->external = 0;
|
||||||
|
|
||||||
mddev->layout = info->layout;
|
mddev->layout = info->layout;
|
||||||
mddev->chunk_size = info->chunk_size;
|
mddev->chunk_size = info->chunk_size;
|
||||||
|
|
||||||
mddev->max_disks = MD_SB_DISKS;
|
mddev->max_disks = MD_SB_DISKS;
|
||||||
|
|
||||||
mddev->flags = 0;
|
if (mddev->persistent)
|
||||||
|
mddev->flags = 0;
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
|
||||||
mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
|
mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
|
||||||
|
@ -4982,7 +5017,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||||
mddev->major_version,
|
mddev->major_version,
|
||||||
mddev->minor_version);
|
mddev->minor_version);
|
||||||
}
|
}
|
||||||
} else
|
} else if (mddev->external)
|
||||||
|
seq_printf(seq, " super external:%s",
|
||||||
|
mddev->metadata_type);
|
||||||
|
else
|
||||||
seq_printf(seq, " super non-persistent");
|
seq_printf(seq, " super non-persistent");
|
||||||
|
|
||||||
if (mddev->pers) {
|
if (mddev->pers) {
|
||||||
|
@ -5589,7 +5627,7 @@ void md_check_recovery(mddev_t *mddev)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! (
|
if ( ! (
|
||||||
mddev->flags ||
|
(mddev->flags && !mddev->external) ||
|
||||||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
|
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
|
||||||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
|
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
|
||||||
(mddev->safemode == 1) ||
|
(mddev->safemode == 1) ||
|
||||||
|
@ -5605,7 +5643,8 @@ void md_check_recovery(mddev_t *mddev)
|
||||||
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
|
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
|
||||||
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
|
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
|
||||||
mddev->in_sync = 1;
|
mddev->in_sync = 1;
|
||||||
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
|
if (mddev->persistent)
|
||||||
|
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
|
||||||
}
|
}
|
||||||
if (mddev->safemode == 1)
|
if (mddev->safemode == 1)
|
||||||
mddev->safemode = 0;
|
mddev->safemode = 0;
|
||||||
|
|
|
@ -130,6 +130,9 @@ struct mddev_s
|
||||||
minor_version,
|
minor_version,
|
||||||
patch_version;
|
patch_version;
|
||||||
int persistent;
|
int persistent;
|
||||||
|
int external; /* metadata is
|
||||||
|
* managed externally */
|
||||||
|
char metadata_type[17]; /* externally set*/
|
||||||
int chunk_size;
|
int chunk_size;
|
||||||
time_t ctime, utime;
|
time_t ctime, utime;
|
||||||
int level, layout;
|
int level, layout;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче