metadata_update sends message to other nodes

- request to send a message
   - make changes to superblock
   - send messages telling everyone that the superblock has changed
   - other nodes all read the superblock
   - other nodes all ack the messages
   - updating node release the "I'm sending a message" resource.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
This commit is contained in:
Goldwyn Rodrigues 2014-06-07 01:44:51 -05:00
Родитель 601b515c5d
Коммит 293467aa1f
3 изменённых файлов: 106 добавлений и 14 удалений

Просмотреть файл

@ -621,11 +621,39 @@ static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
}
static int metadata_update_start(struct mddev *mddev)
{
return lock_comm(mddev->cluster_info);
}
static int metadata_update_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg;
int ret;
memset(&cmsg, 0, sizeof(cmsg));
cmsg.type = cpu_to_le32(METADATA_UPDATED);
ret = __sendmsg(cinfo, &cmsg);
unlock_comm(cinfo);
return ret;
}
static int metadata_update_cancel(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
return dlm_unlock_sync(cinfo->token_lockres);
}
static struct md_cluster_operations cluster_ops = {
.join = join,
.leave = leave,
.slot_number = slot_number,
.resync_info_update = resync_info_update,
.metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish,
.metadata_update_cancel = metadata_update_cancel,
};
static int __init cluster_init(void)

Просмотреть файл

@ -12,6 +12,9 @@ struct md_cluster_operations {
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev);
};
#endif /* _MD_CLUSTER_H */

Просмотреть файл

@ -2472,10 +2472,14 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
err = -EBUSY;
else {
struct mddev *mddev = rdev->mddev;
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
kick_rdev_from_array(rdev);
if (mddev->pers)
md_update_sb(mddev, 1);
md_new_event(mddev);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
err = 0;
}
} else if (cmd_match(buf, "writemostly")) {
@ -4008,8 +4012,12 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
if (err)
return err;
if (mddev->pers) {
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
err = update_size(mddev, sectors);
md_update_sb(mddev, 1);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
} else {
if (mddev->dev_sectors == 0 ||
mddev->dev_sectors > sectors)
@ -5236,6 +5244,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
@ -5254,6 +5264,8 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->in_sync = 1;
md_update_sb(mddev, 1);
}
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
}
void md_stop_writes(struct mddev *mddev)
@ -5902,6 +5914,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
if (!rdev)
return -ENXIO;
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
clear_bit(Blocked, &rdev->flags);
remove_and_add_spares(mddev, rdev);
@ -5912,8 +5927,13 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
md_update_sb(mddev, 1);
md_new_event(mddev);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
return 0;
busy:
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_cancel(mddev);
printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
bdevname(rdev->bdev,b), mdname(mddev));
return -EBUSY;
@ -5963,12 +5983,15 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
err = -EINVAL;
goto abort_export;
}
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
clear_bit(In_sync, &rdev->flags);
rdev->desc_nr = -1;
rdev->saved_raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev);
if (err)
goto abort_export;
goto abort_clustered;
/*
* The rest should better be atomic, we can have disk failures
@ -5979,6 +6002,8 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
md_update_sb(mddev, 1);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
/*
* Kick recovery, maybe this spare has to be added to the
* array immediately.
@ -5988,6 +6013,9 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
md_new_event(mddev);
return 0;
abort_clustered:
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_cancel(mddev);
abort_export:
export_rdev(rdev);
return err;
@ -6304,6 +6332,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
return rv;
}
}
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
rv = update_size(mddev, (sector_t)info->size * 2);
@ -6311,17 +6341,25 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
rv = update_raid_disks(mddev, info->raid_disks);
if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
if (mddev->pers->quiesce == NULL || mddev->thread == NULL)
return -EINVAL;
if (mddev->recovery || mddev->sync_thread)
return -EBUSY;
if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
rv = -EINVAL;
goto err;
}
if (mddev->recovery || mddev->sync_thread) {
rv = -EBUSY;
goto err;
}
if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
struct bitmap *bitmap;
/* add the bitmap */
if (mddev->bitmap)
return -EEXIST;
if (mddev->bitmap_info.default_offset == 0)
return -EINVAL;
if (mddev->bitmap) {
rv = -EEXIST;
goto err;
}
if (mddev->bitmap_info.default_offset == 0) {
rv = -EINVAL;
goto err;
}
mddev->bitmap_info.offset =
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
@ -6337,10 +6375,14 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->pers->quiesce(mddev, 0);
} else {
/* remove the bitmap */
if (!mddev->bitmap)
return -ENOENT;
if (mddev->bitmap->storage.file)
return -EINVAL;
if (!mddev->bitmap) {
rv = -ENOENT;
goto err;
}
if (mddev->bitmap->storage.file) {
rv = -EINVAL;
goto err;
}
mddev->pers->quiesce(mddev, 1);
bitmap_destroy(mddev);
mddev->pers->quiesce(mddev, 0);
@ -6348,6 +6390,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
}
}
md_update_sb(mddev, 1);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
return rv;
err:
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_cancel(mddev);
return rv;
}
@ -7438,7 +7486,11 @@ int md_allow_write(struct mddev *mddev)
mddev->safemode == 0)
mddev->safemode = 1;
spin_unlock(&mddev->lock);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
md_update_sb(mddev, 0);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
sysfs_notify_dirent_safe(mddev->sysfs_state);
} else
spin_unlock(&mddev->lock);
@ -7996,8 +8048,13 @@ void md_check_recovery(struct mddev *mddev)
sysfs_notify_dirent_safe(mddev->sysfs_state);
}
if (mddev->flags & MD_UPDATE_SB_FLAGS)
if (mddev->flags & MD_UPDATE_SB_FLAGS) {
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
md_update_sb(mddev, 0);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
@ -8095,6 +8152,8 @@ void md_reap_sync_thread(struct mddev *mddev)
set_bit(MD_CHANGE_DEVS, &mddev->flags);
}
}
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_start(mddev);
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->pers->finish_reshape)
mddev->pers->finish_reshape(mddev);
@ -8107,6 +8166,8 @@ void md_reap_sync_thread(struct mddev *mddev)
rdev->saved_raid_disk = -1;
md_update_sb(mddev, 1);
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);