md: Protect access to mddev->disks list using RCU
All modifications and most access to the mddev->disks list are made under the reconfig_mutex lock. However there are three places where the list is walked without any locking. If a reconfig happens at this time, havoc (and oops) can ensue. So use RCU to protect these accesses: - wrap them in rcu_read_{,un}lock() - use list_for_each_entry_rcu - add to the list with list_add_rcu - delete from the list with list_del_rcu - delay the 'free' with call_rcu rather than schedule_work Note that export_rdev did a list_del_init on this list. In almost all cases the entry was not in the list anymore so it was a no-op and so safe. It is no longer safe as after list_del_rcu we may not touch the list_head. An audit shows that export_rdev is called: - after unbind_rdev_from_array, in which case the delete has already been done, - after bind_rdev_to_array fails, in which case the delete isn't needed. - before the device has been put on a list at all (e.g. in add_new_disk where reading the superblock fails). - and in autorun devices after a failure when the device is on a different list. So remove the list_del_init call from export_rdev, and add it back immediately before the called to export_rdev for that last case. Note also that ->same_set is sometimes used for lists other than mddev->list (e.g. candidates). In these cases rcu is not needed. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
Родитель
f2ea68cf42
Коммит
4b80991c6c
|
@ -241,10 +241,10 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
|
|||
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
mdk_rdev_t *rdev;
|
||||
struct list_head *tmp;
|
||||
mddev_t *mddev = bitmap->mddev;
|
||||
|
||||
rdev_for_each(rdev, tmp, mddev)
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev)
|
||||
if (test_bit(In_sync, &rdev->flags)
|
||||
&& !test_bit(Faulty, &rdev->flags)) {
|
||||
int size = PAGE_SIZE;
|
||||
|
@ -260,11 +260,11 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
|||
+ (long)(page->index * (PAGE_SIZE/512))
|
||||
+ size/512 > 0)
|
||||
/* bitmap runs in to metadata */
|
||||
return -EINVAL;
|
||||
goto bad_alignment;
|
||||
if (rdev->data_offset + mddev->size*2
|
||||
> rdev->sb_start + bitmap->offset)
|
||||
/* data runs in to bitmap */
|
||||
return -EINVAL;
|
||||
goto bad_alignment;
|
||||
} else if (rdev->sb_start < rdev->data_offset) {
|
||||
/* METADATA BITMAP DATA */
|
||||
if (rdev->sb_start
|
||||
|
@ -272,7 +272,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
|||
+ page->index*(PAGE_SIZE/512) + size/512
|
||||
> rdev->data_offset)
|
||||
/* bitmap runs in to data */
|
||||
return -EINVAL;
|
||||
goto bad_alignment;
|
||||
} else {
|
||||
/* DATA METADATA BITMAP - no problems */
|
||||
}
|
||||
|
@ -282,10 +282,15 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
|||
size,
|
||||
page);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (wait)
|
||||
md_super_wait(mddev);
|
||||
return 0;
|
||||
|
||||
bad_alignment:
|
||||
rcu_read_unlock();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void bitmap_file_kick(struct bitmap *bitmap);
|
||||
|
|
|
@ -1395,15 +1395,17 @@ static struct super_type super_types[] = {
|
|||
|
||||
static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
|
||||
{
|
||||
struct list_head *tmp, *tmp2;
|
||||
mdk_rdev_t *rdev, *rdev2;
|
||||
|
||||
rdev_for_each(rdev, tmp, mddev1)
|
||||
rdev_for_each(rdev2, tmp2, mddev2)
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev1)
|
||||
rdev_for_each_rcu(rdev2, mddev2)
|
||||
if (rdev->bdev->bd_contains ==
|
||||
rdev2->bdev->bd_contains)
|
||||
rdev2->bdev->bd_contains) {
|
||||
rcu_read_unlock();
|
||||
return 1;
|
||||
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1470,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
|
|||
kobject_del(&rdev->kobj);
|
||||
goto fail;
|
||||
}
|
||||
list_add(&rdev->same_set, &mddev->disks);
|
||||
list_add_rcu(&rdev->same_set, &mddev->disks);
|
||||
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
|
||||
return 0;
|
||||
|
||||
|
@ -1495,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
|
|||
return;
|
||||
}
|
||||
bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
|
||||
list_del_init(&rdev->same_set);
|
||||
list_del_rcu(&rdev->same_set);
|
||||
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
|
||||
rdev->mddev = NULL;
|
||||
sysfs_remove_link(&rdev->kobj, "block");
|
||||
|
||||
/* We need to delay this, otherwise we can deadlock when
|
||||
* writing to 'remove' to "dev/state"
|
||||
* writing to 'remove' to "dev/state". We also need
|
||||
* to delay it due to rcu usage.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
INIT_WORK(&rdev->del_work, md_delayed_delete);
|
||||
kobject_get(&rdev->kobj);
|
||||
schedule_work(&rdev->del_work);
|
||||
|
@ -1558,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev)
|
|||
if (rdev->mddev)
|
||||
MD_BUG();
|
||||
free_disk_sb(rdev);
|
||||
list_del_init(&rdev->same_set);
|
||||
#ifndef MODULE
|
||||
if (test_bit(AutoDetected, &rdev->flags))
|
||||
md_autodetect_dev(rdev->bdev->bd_dev);
|
||||
|
@ -4062,8 +4065,10 @@ static void autorun_devices(int part)
|
|||
/* on success, candidates will be empty, on error
|
||||
* it won't...
|
||||
*/
|
||||
rdev_for_each_list(rdev, tmp, candidates)
|
||||
rdev_for_each_list(rdev, tmp, candidates) {
|
||||
list_del_init(&rdev->same_set);
|
||||
export_rdev(rdev);
|
||||
}
|
||||
mddev_put(mddev);
|
||||
}
|
||||
printk(KERN_INFO "md: ... autorun DONE.\n");
|
||||
|
@ -5529,12 +5534,12 @@ int unregister_md_personality(struct mdk_personality *p)
|
|||
static int is_mddev_idle(mddev_t *mddev)
|
||||
{
|
||||
mdk_rdev_t * rdev;
|
||||
struct list_head *tmp;
|
||||
int idle;
|
||||
long curr_events;
|
||||
|
||||
idle = 1;
|
||||
rdev_for_each(rdev, tmp, mddev) {
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev) {
|
||||
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
|
||||
curr_events = disk_stat_read(disk, sectors[0]) +
|
||||
disk_stat_read(disk, sectors[1]) -
|
||||
|
@ -5566,6 +5571,7 @@ static int is_mddev_idle(mddev_t *mddev)
|
|||
idle = 0;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return idle;
|
||||
}
|
||||
|
||||
|
|
|
@ -339,6 +339,9 @@ static inline char * mdname (mddev_t * mddev)
|
|||
#define rdev_for_each(rdev, tmp, mddev) \
|
||||
rdev_for_each_list(rdev, tmp, (mddev)->disks)
|
||||
|
||||
#define rdev_for_each_rcu(rdev, mddev) \
|
||||
list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
|
||||
|
||||
typedef struct mdk_thread_s {
|
||||
void (*run) (mddev_t *mddev);
|
||||
mddev_t *mddev;
|
||||
|
|
Загрузка…
Ссылка в новой задаче