mm: disable interrupts while initializing deferred pages
Vlastimil Babka reported about a window issue during which when deferred pages are initialized, and the current version of on-demand initialization is finished, allocations may fail. While this is highly unlikely scenario, since this kind of allocation request must be large, and must come from interrupt handler, we still want to cover it. We solve this by initializing deferred pages with interrupts disabled, and holding node_size_lock spin lock while pages in the node are being initialized. The on-demand deferred page initialization that comes later will use the same lock, and thus synchronize with deferred_init_memmap(). It is unlikely for threads that initialize deferred pages to be interrupted. They run soon after smp_init(), but before modules are initialized, and long before user space programs. This is why there is no adverse effect of having these threads running with interrupts disabled. [pasha.tatashin@oracle.com: v6] Link: http://lkml.kernel.org/r/20180313182355.17669-2-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180309220807.24961-2-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Steven Sistare <steven.sistare@oracle.com> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: AKASHI Takahiro <takahiro.akashi@linaro.org> Cc: Gioh Kim <gi-oh.kim@profitbricks.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Yaowei Bai <baiyaowei@cmss.chinamobile.com> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Miles Chen <miles.chen@mediatek.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
8e7a0c9100
Коммит
3a2d7fa8a3
|
@ -51,24 +51,6 @@ enum {
|
||||||
MMOP_ONLINE_MOVABLE,
|
MMOP_ONLINE_MOVABLE,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* pgdat resizing functions
|
|
||||||
*/
|
|
||||||
static inline
|
|
||||||
void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
|
|
||||||
{
|
|
||||||
spin_lock_irqsave(&pgdat->node_size_lock, *flags);
|
|
||||||
}
|
|
||||||
static inline
|
|
||||||
void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
|
|
||||||
{
|
|
||||||
spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
|
|
||||||
}
|
|
||||||
static inline
|
|
||||||
void pgdat_resize_init(struct pglist_data *pgdat)
|
|
||||||
{
|
|
||||||
spin_lock_init(&pgdat->node_size_lock);
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Zone resizing functions
|
* Zone resizing functions
|
||||||
*
|
*
|
||||||
|
@ -246,13 +228,6 @@ extern void clear_zone_contiguous(struct zone *zone);
|
||||||
___page; \
|
___page; \
|
||||||
})
|
})
|
||||||
|
|
||||||
/*
|
|
||||||
* Stub functions for when hotplug is off
|
|
||||||
*/
|
|
||||||
static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
|
|
||||||
static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
|
|
||||||
static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
|
|
||||||
|
|
||||||
static inline unsigned zone_span_seqbegin(struct zone *zone)
|
static inline unsigned zone_span_seqbegin(struct zone *zone)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -293,6 +268,34 @@ static inline bool movable_node_is_enabled(void)
|
||||||
}
|
}
|
||||||
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
||||||
|
|
||||||
|
#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
|
||||||
|
/*
|
||||||
|
* pgdat resizing functions
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
|
||||||
|
{
|
||||||
|
spin_lock_irqsave(&pgdat->node_size_lock, *flags);
|
||||||
|
}
|
||||||
|
static inline
|
||||||
|
void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
|
||||||
|
{
|
||||||
|
spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
|
||||||
|
}
|
||||||
|
static inline
|
||||||
|
void pgdat_resize_init(struct pglist_data *pgdat)
|
||||||
|
{
|
||||||
|
spin_lock_init(&pgdat->node_size_lock);
|
||||||
|
}
|
||||||
|
#else /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */
|
||||||
|
/*
|
||||||
|
* Stub functions for when hotplug is off
|
||||||
|
*/
|
||||||
|
static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
|
||||||
|
static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
|
||||||
|
static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
|
||||||
|
#endif /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||||
|
|
||||||
extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
|
extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
|
||||||
|
|
|
@ -633,14 +633,15 @@ typedef struct pglist_data {
|
||||||
#ifndef CONFIG_NO_BOOTMEM
|
#ifndef CONFIG_NO_BOOTMEM
|
||||||
struct bootmem_data *bdata;
|
struct bootmem_data *bdata;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
|
||||||
/*
|
/*
|
||||||
* Must be held any time you expect node_start_pfn, node_present_pages
|
* Must be held any time you expect node_start_pfn, node_present_pages
|
||||||
* or node_spanned_pages stay constant. Holding this will also
|
* or node_spanned_pages stay constant. Holding this will also
|
||||||
* guarantee that any pfn_valid() stays that way.
|
* guarantee that any pfn_valid() stays that way.
|
||||||
*
|
*
|
||||||
* pgdat_resize_lock() and pgdat_resize_unlock() are provided to
|
* pgdat_resize_lock() and pgdat_resize_unlock() are provided to
|
||||||
* manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG.
|
* manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
|
||||||
|
* or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
|
||||||
*
|
*
|
||||||
* Nests above zone->lock and zone->span_seqlock
|
* Nests above zone->lock and zone->span_seqlock
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1506,7 +1506,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
|
||||||
} else if (!(pfn & nr_pgmask)) {
|
} else if (!(pfn & nr_pgmask)) {
|
||||||
deferred_free_range(pfn - nr_free, nr_free);
|
deferred_free_range(pfn - nr_free, nr_free);
|
||||||
nr_free = 1;
|
nr_free = 1;
|
||||||
cond_resched();
|
touch_nmi_watchdog();
|
||||||
} else {
|
} else {
|
||||||
nr_free++;
|
nr_free++;
|
||||||
}
|
}
|
||||||
|
@ -1535,7 +1535,7 @@ static unsigned long __init deferred_init_pages(int nid, int zid,
|
||||||
continue;
|
continue;
|
||||||
} else if (!page || !(pfn & nr_pgmask)) {
|
} else if (!page || !(pfn & nr_pgmask)) {
|
||||||
page = pfn_to_page(pfn);
|
page = pfn_to_page(pfn);
|
||||||
cond_resched();
|
touch_nmi_watchdog();
|
||||||
} else {
|
} else {
|
||||||
page++;
|
page++;
|
||||||
}
|
}
|
||||||
|
@ -1552,23 +1552,25 @@ static int __init deferred_init_memmap(void *data)
|
||||||
int nid = pgdat->node_id;
|
int nid = pgdat->node_id;
|
||||||
unsigned long start = jiffies;
|
unsigned long start = jiffies;
|
||||||
unsigned long nr_pages = 0;
|
unsigned long nr_pages = 0;
|
||||||
unsigned long spfn, epfn;
|
unsigned long spfn, epfn, first_init_pfn, flags;
|
||||||
phys_addr_t spa, epa;
|
phys_addr_t spa, epa;
|
||||||
int zid;
|
int zid;
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
unsigned long first_init_pfn = pgdat->first_deferred_pfn;
|
|
||||||
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
|
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
|
||||||
u64 i;
|
u64 i;
|
||||||
|
|
||||||
if (first_init_pfn == ULONG_MAX) {
|
|
||||||
pgdat_init_report_one_done();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Bind memory initialisation thread to a local node if possible */
|
/* Bind memory initialisation thread to a local node if possible */
|
||||||
if (!cpumask_empty(cpumask))
|
if (!cpumask_empty(cpumask))
|
||||||
set_cpus_allowed_ptr(current, cpumask);
|
set_cpus_allowed_ptr(current, cpumask);
|
||||||
|
|
||||||
|
pgdat_resize_lock(pgdat, &flags);
|
||||||
|
first_init_pfn = pgdat->first_deferred_pfn;
|
||||||
|
if (first_init_pfn == ULONG_MAX) {
|
||||||
|
pgdat_resize_unlock(pgdat, &flags);
|
||||||
|
pgdat_init_report_one_done();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Sanity check boundaries */
|
/* Sanity check boundaries */
|
||||||
BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
|
BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
|
||||||
BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
|
BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
|
||||||
|
@ -1598,6 +1600,7 @@ static int __init deferred_init_memmap(void *data)
|
||||||
epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
|
epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
|
||||||
deferred_free_pages(nid, zid, spfn, epfn);
|
deferred_free_pages(nid, zid, spfn, epfn);
|
||||||
}
|
}
|
||||||
|
pgdat_resize_unlock(pgdat, &flags);
|
||||||
|
|
||||||
/* Sanity check that the next zone really is unpopulated */
|
/* Sanity check that the next zone really is unpopulated */
|
||||||
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
|
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
|
||||||
|
|
Загрузка…
Ссылка в новой задаче