ksm: stop hotremove lockdep warning
Complaints are rare, but lockdep still does not understand the way ksm_memory_callback(MEM_GOING_OFFLINE) takes ksm_thread_mutex, and holds it until the ksm_memory_callback(MEM_OFFLINE): that appears to be a problem because notifier callbacks are made under down_read of blocking_notifier_head->rwsem (so first the mutex is taken while holding the rwsem, then later the rwsem is taken while still holding the mutex); but is not in fact a problem because mem_hotplug_mutex is held throughout the dance. There was an attempt to fix this with mutex_lock_nested(); but if that happened to fool lockdep two years ago, apparently it does so no longer. I had hoped to eradicate this issue in extending KSM page migration not to need the ksm_thread_mutex. But then realized that although the page migration itself is safe, we do still need to lock out ksmd and other users of get_ksm_page() while offlining memory - at some point between MEM_GOING_OFFLINE and MEM_OFFLINE, the struct pages themselves may vanish, and get_ksm_page()'s accesses to them become a violation. So, give up on holding ksm_thread_mutex itself from MEM_GOING_OFFLINE to MEM_OFFLINE, and add a KSM_RUN_OFFLINE flag, and wait_while_offlining() checks, to achieve the same lockout without being caught by lockdep. This is less elegant for KSM, but it's more important to keep lockdep useful to other users - and I apologize for how long it took to fix. Signed-off-by: Hugh Dickins <hughd@google.com> Reported-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Tested-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Rik van Riel <riel@redhat.com> Cc: Petr Holasek <pholasek@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Izik Eidus <izik.eidus@ravellosystems.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
9c620e2bc5
Коммит
ef4d43a807
55
mm/ksm.c
55
mm/ksm.c
|
@ -226,7 +226,9 @@ static unsigned int ksm_merge_across_nodes = 1;
|
||||||
#define KSM_RUN_STOP 0
|
#define KSM_RUN_STOP 0
|
||||||
#define KSM_RUN_MERGE 1
|
#define KSM_RUN_MERGE 1
|
||||||
#define KSM_RUN_UNMERGE 2
|
#define KSM_RUN_UNMERGE 2
|
||||||
static unsigned int ksm_run = KSM_RUN_STOP;
|
#define KSM_RUN_OFFLINE 4
|
||||||
|
static unsigned long ksm_run = KSM_RUN_STOP;
|
||||||
|
static void wait_while_offlining(void);
|
||||||
|
|
||||||
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
|
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
|
||||||
static DEFINE_MUTEX(ksm_thread_mutex);
|
static DEFINE_MUTEX(ksm_thread_mutex);
|
||||||
|
@ -1700,6 +1702,7 @@ static int ksm_scan_thread(void *nothing)
|
||||||
|
|
||||||
while (!kthread_should_stop()) {
|
while (!kthread_should_stop()) {
|
||||||
mutex_lock(&ksm_thread_mutex);
|
mutex_lock(&ksm_thread_mutex);
|
||||||
|
wait_while_offlining();
|
||||||
if (ksmd_should_run())
|
if (ksmd_should_run())
|
||||||
ksm_do_scan(ksm_thread_pages_to_scan);
|
ksm_do_scan(ksm_thread_pages_to_scan);
|
||||||
mutex_unlock(&ksm_thread_mutex);
|
mutex_unlock(&ksm_thread_mutex);
|
||||||
|
@ -2056,6 +2059,22 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
|
||||||
#endif /* CONFIG_MIGRATION */
|
#endif /* CONFIG_MIGRATION */
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||||
|
static int just_wait(void *word)
|
||||||
|
{
|
||||||
|
schedule();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void wait_while_offlining(void)
|
||||||
|
{
|
||||||
|
while (ksm_run & KSM_RUN_OFFLINE) {
|
||||||
|
mutex_unlock(&ksm_thread_mutex);
|
||||||
|
wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
|
||||||
|
just_wait, TASK_UNINTERRUPTIBLE);
|
||||||
|
mutex_lock(&ksm_thread_mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void ksm_check_stable_tree(unsigned long start_pfn,
|
static void ksm_check_stable_tree(unsigned long start_pfn,
|
||||||
unsigned long end_pfn)
|
unsigned long end_pfn)
|
||||||
{
|
{
|
||||||
|
@ -2098,15 +2117,15 @@ static int ksm_memory_callback(struct notifier_block *self,
|
||||||
switch (action) {
|
switch (action) {
|
||||||
case MEM_GOING_OFFLINE:
|
case MEM_GOING_OFFLINE:
|
||||||
/*
|
/*
|
||||||
* Keep it very simple for now: just lock out ksmd and
|
* Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
|
||||||
* MADV_UNMERGEABLE while any memory is going offline.
|
* and remove_all_stable_nodes() while memory is going offline:
|
||||||
* mutex_lock_nested() is necessary because lockdep was alarmed
|
* it is unsafe for them to touch the stable tree at this time.
|
||||||
* that here we take ksm_thread_mutex inside notifier chain
|
* But unmerge_ksm_pages(), rmap lookups and other entry points
|
||||||
* mutex, and later take notifier chain mutex inside
|
* which do not need the ksm_thread_mutex are all safe.
|
||||||
* ksm_thread_mutex to unlock it. But that's safe because both
|
|
||||||
* are inside mem_hotplug_mutex.
|
|
||||||
*/
|
*/
|
||||||
mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
|
mutex_lock(&ksm_thread_mutex);
|
||||||
|
ksm_run |= KSM_RUN_OFFLINE;
|
||||||
|
mutex_unlock(&ksm_thread_mutex);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case MEM_OFFLINE:
|
case MEM_OFFLINE:
|
||||||
|
@ -2122,11 +2141,20 @@ static int ksm_memory_callback(struct notifier_block *self,
|
||||||
/* fallthrough */
|
/* fallthrough */
|
||||||
|
|
||||||
case MEM_CANCEL_OFFLINE:
|
case MEM_CANCEL_OFFLINE:
|
||||||
|
mutex_lock(&ksm_thread_mutex);
|
||||||
|
ksm_run &= ~KSM_RUN_OFFLINE;
|
||||||
mutex_unlock(&ksm_thread_mutex);
|
mutex_unlock(&ksm_thread_mutex);
|
||||||
|
|
||||||
|
smp_mb(); /* wake_up_bit advises this */
|
||||||
|
wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static void wait_while_offlining(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||||
|
|
||||||
#ifdef CONFIG_SYSFS
|
#ifdef CONFIG_SYSFS
|
||||||
|
@ -2189,7 +2217,7 @@ KSM_ATTR(pages_to_scan);
|
||||||
static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
|
static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
return sprintf(buf, "%u\n", ksm_run);
|
return sprintf(buf, "%lu\n", ksm_run);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
|
static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||||
|
@ -2212,6 +2240,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||||
*/
|
*/
|
||||||
|
|
||||||
mutex_lock(&ksm_thread_mutex);
|
mutex_lock(&ksm_thread_mutex);
|
||||||
|
wait_while_offlining();
|
||||||
if (ksm_run != flags) {
|
if (ksm_run != flags) {
|
||||||
ksm_run = flags;
|
ksm_run = flags;
|
||||||
if (flags & KSM_RUN_UNMERGE) {
|
if (flags & KSM_RUN_UNMERGE) {
|
||||||
|
@ -2254,6 +2283,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mutex_lock(&ksm_thread_mutex);
|
mutex_lock(&ksm_thread_mutex);
|
||||||
|
wait_while_offlining();
|
||||||
if (ksm_merge_across_nodes != knob) {
|
if (ksm_merge_across_nodes != knob) {
|
||||||
if (ksm_pages_shared || remove_all_stable_nodes())
|
if (ksm_pages_shared || remove_all_stable_nodes())
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
|
@ -2366,10 +2396,7 @@ static int __init ksm_init(void)
|
||||||
#endif /* CONFIG_SYSFS */
|
#endif /* CONFIG_SYSFS */
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||||
/*
|
/* There is no significance to this priority 100 */
|
||||||
* Choose a high priority since the callback takes ksm_thread_mutex:
|
|
||||||
* later callbacks could only be taking locks which nest within that.
|
|
||||||
*/
|
|
||||||
hotplug_memory_notifier(ksm_memory_callback, 100);
|
hotplug_memory_notifier(ksm_memory_callback, 100);
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче