2005-10-30 04:16:52 +03:00
|
|
|
#ifndef __LINUX_MEMORY_HOTPLUG_H
|
|
|
|
#define __LINUX_MEMORY_HOTPLUG_H
|
|
|
|
|
|
|
|
#include <linux/mmzone.h>
|
|
|
|
#include <linux/spinlock.h>
|
2005-10-30 04:16:54 +03:00
|
|
|
#include <linux/notifier.h>
|
2011-11-24 05:12:59 +04:00
|
|
|
#include <linux/bug.h>
|
2005-10-30 04:16:52 +03:00
|
|
|
|
2006-03-07 02:42:49 +03:00
|
|
|
struct page;
|
|
|
|
struct zone;
|
|
|
|
struct pglist_data;
|
2008-04-28 13:12:01 +04:00
|
|
|
struct mem_section;
|
2012-10-09 03:34:01 +04:00
|
|
|
struct memory_block;
|
2015-06-25 18:35:49 +03:00
|
|
|
struct resource;
|
2006-03-07 02:42:49 +03:00
|
|
|
|
2005-10-30 04:16:52 +03:00
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
|
|
|
|
/*
|
2011-01-14 02:47:00 +03:00
|
|
|
* Types for free bootmem stored in page->lru.next. These have to be in
|
|
|
|
* some random range in unsigned long space for debugging purposes.
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
*/
|
2011-01-14 02:47:00 +03:00
|
|
|
enum {
|
|
|
|
MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
|
|
|
|
SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
|
|
|
|
MIX_SECTION_INFO,
|
|
|
|
NODE_INFO,
|
|
|
|
MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
|
|
|
|
};
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
|
2014-08-07 03:05:13 +04:00
|
|
|
/* Types for control the zone type of onlined and offlined memory */
|
mm, memory-hotplug: dynamic configure movable memory and portion memory
Add online_movable and online_kernel for logic memory hotplug. This is
the dynamic version of "movablecore" & "kernelcore".
We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore". It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:
o We can configure memory as kernelcore or movablecore after boot.
Userspace workload is increased, we need more hugepage, we can't use
"online_movable" to add memory and allow the system use more
THP(transparent-huge-page), vice-verse when kernel workload is increase.
Also help for virtualization to dynamic configure host/guest's memory,
to save/(reduce waste) memory.
Memory capacity on Demand
o When a new node is physically online after boot, we need to use
"online_movable" or "online_kernel" to configure/portion it as we
expected when we logic-online it.
This configuration also helps for physically-memory-migrate.
o all benefit as the same as existed "movablecore" & "kernelcore".
o Preparing for movable-node, which is very important for power-saving,
hardware partitioning and high-available-system(hardware fault
management).
(Note, we don't introduce movable-node here.)
Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.
When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.
Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.
[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-12 04:03:16 +04:00
|
|
|
enum {
|
2014-08-07 03:05:13 +04:00
|
|
|
MMOP_OFFLINE = -1,
|
|
|
|
MMOP_ONLINE_KEEP,
|
|
|
|
MMOP_ONLINE_KERNEL,
|
|
|
|
MMOP_ONLINE_MOVABLE,
|
mm, memory-hotplug: dynamic configure movable memory and portion memory
Add online_movable and online_kernel for logic memory hotplug. This is
the dynamic version of "movablecore" & "kernelcore".
We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore". It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:
o We can configure memory as kernelcore or movablecore after boot.
Userspace workload is increased, we need more hugepage, we can't use
"online_movable" to add memory and allow the system use more
THP(transparent-huge-page), vice-verse when kernel workload is increase.
Also help for virtualization to dynamic configure host/guest's memory,
to save/(reduce waste) memory.
Memory capacity on Demand
o When a new node is physically online after boot, we need to use
"online_movable" or "online_kernel" to configure/portion it as we
expected when we logic-online it.
This configuration also helps for physically-memory-migrate.
o all benefit as the same as existed "movablecore" & "kernelcore".
o Preparing for movable-node, which is very important for power-saving,
hardware partitioning and high-available-system(hardware fault
management).
(Note, we don't introduce movable-node here.)
Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.
When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.
Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.
[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-12 04:03:16 +04:00
|
|
|
};
|
|
|
|
|
2005-10-30 04:16:52 +03:00
|
|
|
/*
|
|
|
|
* pgdat resizing functions
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
|
|
|
|
{
|
|
|
|
spin_lock_irqsave(&pgdat->node_size_lock, *flags);
|
|
|
|
}
|
|
|
|
static inline
|
|
|
|
void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
|
|
|
|
{
|
2005-10-30 04:16:53 +03:00
|
|
|
spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
|
2005-10-30 04:16:52 +03:00
|
|
|
}
|
|
|
|
static inline
|
|
|
|
void pgdat_resize_init(struct pglist_data *pgdat)
|
|
|
|
{
|
|
|
|
spin_lock_init(&pgdat->node_size_lock);
|
|
|
|
}
|
2005-10-30 04:16:53 +03:00
|
|
|
/*
|
|
|
|
* Zone resizing functions
|
mm, memory-hotplug: dynamic configure movable memory and portion memory
Add online_movable and online_kernel for logic memory hotplug. This is
the dynamic version of "movablecore" & "kernelcore".
We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore". It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:
o We can configure memory as kernelcore or movablecore after boot.
Userspace workload is increased, we need more hugepage, we can't use
"online_movable" to add memory and allow the system use more
THP(transparent-huge-page), vice-verse when kernel workload is increase.
Also help for virtualization to dynamic configure host/guest's memory,
to save/(reduce waste) memory.
Memory capacity on Demand
o When a new node is physically online after boot, we need to use
"online_movable" or "online_kernel" to configure/portion it as we
expected when we logic-online it.
This configuration also helps for physically-memory-migrate.
o all benefit as the same as existed "movablecore" & "kernelcore".
o Preparing for movable-node, which is very important for power-saving,
hardware partitioning and high-available-system(hardware fault
management).
(Note, we don't introduce movable-node here.)
Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.
When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.
Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.
[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-12 04:03:16 +04:00
|
|
|
*
|
|
|
|
* Note: any attempt to resize a zone should has pgdat_resize_lock()
|
|
|
|
* zone_span_writelock() both held. This ensure the size of a zone
|
|
|
|
* can't be changed while pgdat_resize_lock() held.
|
2005-10-30 04:16:53 +03:00
|
|
|
*/
|
|
|
|
static inline unsigned zone_span_seqbegin(struct zone *zone)
|
|
|
|
{
|
|
|
|
return read_seqbegin(&zone->span_seqlock);
|
|
|
|
}
|
|
|
|
static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
|
|
|
|
{
|
|
|
|
return read_seqretry(&zone->span_seqlock, iv);
|
|
|
|
}
|
|
|
|
static inline void zone_span_writelock(struct zone *zone)
|
|
|
|
{
|
|
|
|
write_seqlock(&zone->span_seqlock);
|
|
|
|
}
|
|
|
|
static inline void zone_span_writeunlock(struct zone *zone)
|
|
|
|
{
|
|
|
|
write_sequnlock(&zone->span_seqlock);
|
|
|
|
}
|
|
|
|
static inline void zone_seqlock_init(struct zone *zone)
|
|
|
|
{
|
|
|
|
seqlock_init(&zone->span_seqlock);
|
|
|
|
}
|
2005-10-30 04:16:54 +03:00
|
|
|
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
|
|
|
|
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
|
|
|
|
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
|
|
|
|
/* VM interface that may be used by firmware interface */
|
mm, memory-hotplug: dynamic configure movable memory and portion memory
Add online_movable and online_kernel for logic memory hotplug. This is
the dynamic version of "movablecore" & "kernelcore".
We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore". It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:
o We can configure memory as kernelcore or movablecore after boot.
Userspace workload is increased, we need more hugepage, we can't use
"online_movable" to add memory and allow the system use more
THP(transparent-huge-page), vice-verse when kernel workload is increase.
Also help for virtualization to dynamic configure host/guest's memory,
to save/(reduce waste) memory.
Memory capacity on Demand
o When a new node is physically online after boot, we need to use
"online_movable" or "online_kernel" to configure/portion it as we
expected when we logic-online it.
This configuration also helps for physically-memory-migrate.
o all benefit as the same as existed "movablecore" & "kernelcore".
o Preparing for movable-node, which is very important for power-saving,
hardware partitioning and high-available-system(hardware fault
management).
(Note, we don't introduce movable-node here.)
Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.
When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.
Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.
[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-12-12 04:03:16 +04:00
|
|
|
extern int online_pages(unsigned long, unsigned long, int);
|
2014-10-10 02:26:31 +04:00
|
|
|
extern int test_pages_in_a_zone(unsigned long, unsigned long);
|
2007-10-16 12:26:12 +04:00
|
|
|
extern void __offline_isolated_pages(unsigned long, unsigned long);
|
2007-10-16 12:26:14 +04:00
|
|
|
|
2011-07-26 04:12:05 +04:00
|
|
|
typedef void (*online_page_callback_t)(struct page *page);
|
|
|
|
|
|
|
|
extern int set_online_page_callback(online_page_callback_t callback);
|
|
|
|
extern int restore_online_page_callback(online_page_callback_t callback);
|
|
|
|
|
|
|
|
extern void __online_page_set_limits(struct page *page);
|
|
|
|
extern void __online_page_increment_counters(struct page *page);
|
|
|
|
extern void __online_page_free(struct page *page);
|
|
|
|
|
2013-11-13 03:07:25 +04:00
|
|
|
extern int try_online_node(int nid);
|
|
|
|
|
2010-10-27 01:21:30 +04:00
|
|
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
|
|
|
extern bool is_pageblock_removable_nolock(struct page *page);
|
2013-02-23 04:32:58 +04:00
|
|
|
extern int arch_remove_memory(u64 start, u64 size);
|
2013-04-30 02:08:22 +04:00
|
|
|
extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
|
|
|
|
unsigned long nr_pages);
|
2010-10-27 01:21:30 +04:00
|
|
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
|
|
|
|
2005-10-30 04:16:54 +03:00
|
|
|
/* reasonably generic interface to expand the physical pages in a zone */
|
2009-01-07 01:39:14 +03:00
|
|
|
extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
|
2005-10-30 04:16:54 +03:00
|
|
|
unsigned long nr_pages);
|
2006-06-27 13:53:30 +04:00
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
extern int memory_add_physaddr_to_nid(u64 start);
|
|
|
|
#else
|
|
|
|
static inline int memory_add_physaddr_to_nid(u64 start)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2006-06-27 13:53:32 +04:00
|
|
|
#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
|
|
|
|
/*
|
|
|
|
* For supporting node-hotadd, we have to allocate a new pgdat.
|
|
|
|
*
|
|
|
|
* If an arch has generic style NODE_DATA(),
|
|
|
|
* node_data[nid] = kzalloc() works well. But it depends on the architecture.
|
|
|
|
*
|
|
|
|
* In general, generic_alloc_nodedata() is used.
|
|
|
|
* Now, arch_free_nodedata() is just defined for error path of node_hot_add.
|
|
|
|
*
|
|
|
|
*/
|
2006-06-27 13:53:40 +04:00
|
|
|
extern pg_data_t *arch_alloc_nodedata(int nid);
|
|
|
|
extern void arch_free_nodedata(pg_data_t *pgdat);
|
[PATCH] pgdat allocation and update for ia64 of memory hotplug: update pgdat address array
This is to refresh node_data[] array for ia64. As I mentioned previous
patches, ia64 has copies of information of pgdat address array on each node as
per node data.
At v2 of node_add, this function used stop_machine_run() to update them. (I
wished that they were copied safety as much as possible.) But, in this patch,
this arrays are just copied simply, and set node_online_map bit after
completion of pgdat initialization.
So, kernel must touch NODE_DATA() macro after checking node_online_map().
(Current code has already done it.) This is more simple way for just
hot-add.....
Note : It will be problem when hot-remove will occur,
because, even if online_map bit is set, kernel may
touch NODE_DATA() due to race condition. :-(
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:39 +04:00
|
|
|
extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
|
2006-06-27 13:53:32 +04:00
|
|
|
|
|
|
|
#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
|
|
|
|
|
|
|
|
#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid)
|
|
|
|
#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat)
|
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
/*
|
|
|
|
* If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat.
|
|
|
|
* XXX: kmalloc_node() can't work well to get new node's memory at this time.
|
|
|
|
* Because, pgdat for the new node is not allocated/initialized yet itself.
|
|
|
|
* To use new node's memory, more consideration will be necessary.
|
|
|
|
*/
|
|
|
|
#define generic_alloc_nodedata(nid) \
|
|
|
|
({ \
|
|
|
|
kzalloc(sizeof(pg_data_t), GFP_KERNEL); \
|
|
|
|
})
|
|
|
|
/*
|
|
|
|
* This definition is just for error path in node hotadd.
|
|
|
|
* For node hotremove, we have to replace this.
|
|
|
|
*/
|
|
|
|
#define generic_free_nodedata(pgdat) kfree(pgdat)
|
|
|
|
|
2006-06-27 13:53:33 +04:00
|
|
|
extern pg_data_t *node_data[];
|
|
|
|
static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
|
|
|
|
{
|
|
|
|
node_data[nid] = pgdat;
|
|
|
|
}
|
|
|
|
|
2006-06-27 13:53:32 +04:00
|
|
|
#else /* !CONFIG_NUMA */
|
|
|
|
|
|
|
|
/* never called */
|
|
|
|
static inline pg_data_t *generic_alloc_nodedata(int nid)
|
|
|
|
{
|
|
|
|
BUG();
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
static inline void generic_free_nodedata(pg_data_t *pgdat)
|
|
|
|
{
|
|
|
|
}
|
2006-06-27 13:53:33 +04:00
|
|
|
static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
|
|
|
|
{
|
|
|
|
}
|
2006-06-27 13:53:32 +04:00
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
|
|
|
|
|
2013-02-23 04:33:00 +04:00
|
|
|
#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
|
|
|
|
extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
|
|
|
|
#else
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2013-02-23 04:33:00 +04:00
|
|
|
extern void put_page_bootmem(struct page *page);
|
|
|
|
extern void get_page_bootmem(unsigned long ingo, struct page *page,
|
|
|
|
unsigned long type);
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
|
mem-hotplug: implement get/put_online_mems
kmem_cache_{create,destroy,shrink} need to get a stable value of
cpu/node online mask, because they init/destroy/access per-cpu/node
kmem_cache parts, which can be allocated or destroyed on cpu/mem
hotplug. To protect against cpu hotplug, these functions use
{get,put}_online_cpus. However, they do nothing to synchronize with
memory hotplug - taking the slab_mutex does not eliminate the
possibility of race as described in patch 2.
What we need there is something like get_online_cpus, but for memory.
We already have lock_memory_hotplug, which serves for the purpose, but
it's a bit of a hammer right now, because it's backed by a mutex. As a
result, it imposes some limitations to locking order, which are not
desirable, and can't be used just like get_online_cpus. That's why in
patch 1 I substitute it with get/put_online_mems, which work exactly
like get/put_online_cpus except they block not cpu, but memory hotplug.
[ v1 can be found at https://lkml.org/lkml/2014/4/6/68. I NAK'ed it by
myself, because it used an rw semaphore for get/put_online_mems,
making them dead lock prune. ]
This patch (of 2):
{un}lock_memory_hotplug, which is used to synchronize against memory
hotplug, is currently backed by a mutex, which makes it a bit of a
hammer - threads that only want to get a stable value of online nodes
mask won't be able to proceed concurrently. Also, it imposes some
strong locking ordering rules on it, which narrows down the set of its
usage scenarios.
This patch introduces get/put_online_mems, which are the same as
get/put_online_cpus, but for memory hotplug, i.e. executing a code
inside a get/put_online_mems section will guarantee a stable value of
online nodes, present pages, etc.
lock_memory_hotplug()/unlock_memory_hotplug() are removed altogether.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-06-05 03:07:18 +04:00
|
|
|
void get_online_mems(void);
|
|
|
|
void put_online_mems(void);
|
2010-12-03 01:31:19 +03:00
|
|
|
|
2015-04-15 01:45:11 +03:00
|
|
|
void mem_hotplug_begin(void);
|
|
|
|
void mem_hotplug_done(void);
|
|
|
|
|
2005-10-30 04:16:52 +03:00
|
|
|
#else /* ! CONFIG_MEMORY_HOTPLUG */
|
|
|
|
/*
|
|
|
|
* Stub functions for when hotplug is off
|
|
|
|
*/
|
|
|
|
static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
|
|
|
|
static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
|
|
|
|
static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
|
2005-10-30 04:16:53 +03:00
|
|
|
|
|
|
|
static inline unsigned zone_span_seqbegin(struct zone *zone)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static inline void zone_span_writelock(struct zone *zone) {}
|
|
|
|
static inline void zone_span_writeunlock(struct zone *zone) {}
|
|
|
|
static inline void zone_seqlock_init(struct zone *zone) {}
|
2005-10-30 04:16:54 +03:00
|
|
|
|
|
|
|
static inline int mhp_notimplemented(const char *func)
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
|
|
|
|
dump_stack();
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2013-11-13 03:07:25 +04:00
|
|
|
static inline int try_online_node(int nid)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
mem-hotplug: implement get/put_online_mems
kmem_cache_{create,destroy,shrink} need to get a stable value of
cpu/node online mask, because they init/destroy/access per-cpu/node
kmem_cache parts, which can be allocated or destroyed on cpu/mem
hotplug. To protect against cpu hotplug, these functions use
{get,put}_online_cpus. However, they do nothing to synchronize with
memory hotplug - taking the slab_mutex does not eliminate the
possibility of race as described in patch 2.
What we need there is something like get_online_cpus, but for memory.
We already have lock_memory_hotplug, which serves for the purpose, but
it's a bit of a hammer right now, because it's backed by a mutex. As a
result, it imposes some limitations to locking order, which are not
desirable, and can't be used just like get_online_cpus. That's why in
patch 1 I substitute it with get/put_online_mems, which work exactly
like get/put_online_cpus except they block not cpu, but memory hotplug.
[ v1 can be found at https://lkml.org/lkml/2014/4/6/68. I NAK'ed it by
myself, because it used an rw semaphore for get/put_online_mems,
making them dead lock prune. ]
This patch (of 2):
{un}lock_memory_hotplug, which is used to synchronize against memory
hotplug, is currently backed by a mutex, which makes it a bit of a
hammer - threads that only want to get a stable value of online nodes
mask won't be able to proceed concurrently. Also, it imposes some
strong locking ordering rules on it, which narrows down the set of its
usage scenarios.
This patch introduces get/put_online_mems, which are the same as
get/put_online_cpus, but for memory hotplug, i.e. executing a code
inside a get/put_online_mems section will guarantee a stable value of
online nodes, present pages, etc.
lock_memory_hotplug()/unlock_memory_hotplug() are removed altogether.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-06-05 03:07:18 +04:00
|
|
|
static inline void get_online_mems(void) {}
|
|
|
|
static inline void put_online_mems(void) {}
|
2010-12-03 01:31:19 +03:00
|
|
|
|
2015-04-15 01:45:11 +03:00
|
|
|
static inline void mem_hotplug_begin(void) {}
|
|
|
|
static inline void mem_hotplug_done(void) {}
|
|
|
|
|
2005-10-30 04:16:53 +03:00
|
|
|
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
2006-04-07 21:49:15 +04:00
|
|
|
|
2008-07-24 08:28:19 +04:00
|
|
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
|
|
|
|
|
|
|
extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
|
2013-02-23 04:33:27 +04:00
|
|
|
extern void try_offline_node(int nid);
|
2013-06-02 00:24:07 +04:00
|
|
|
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
|
|
|
|
extern void remove_memory(int nid, u64 start, u64 size);
|
2008-07-24 08:28:19 +04:00
|
|
|
|
|
|
|
#else
|
|
|
|
static inline int is_mem_section_removable(unsigned long pfn,
|
|
|
|
unsigned long nr_pages)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2013-02-23 04:33:27 +04:00
|
|
|
|
|
|
|
static inline void try_offline_node(int nid) {}
|
2013-06-02 00:24:07 +04:00
|
|
|
|
|
|
|
static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void remove_memory(int nid, u64 start, u64 size) {}
|
2008-07-24 08:28:19 +04:00
|
|
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
|
|
|
|
2013-05-08 02:29:49 +04:00
|
|
|
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
void *arg, int (*func)(struct memory_block *, void *));
|
2006-06-27 13:53:30 +04:00
|
|
|
extern int add_memory(int nid, u64 start, u64 size);
|
2015-06-25 18:35:49 +03:00
|
|
|
extern int add_memory_resource(int nid, struct resource *resource);
|
2015-08-09 22:29:06 +03:00
|
|
|
extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
|
|
|
|
bool for_device);
|
|
|
|
extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
|
2012-10-09 03:33:58 +04:00
|
|
|
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
|
2013-02-23 04:32:52 +04:00
|
|
|
extern bool is_memblock_offlined(struct memory_block *mem);
|
2013-05-27 14:58:46 +04:00
|
|
|
extern void remove_memory(int nid, u64 start, u64 size);
|
2013-11-13 03:07:42 +04:00
|
|
|
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
|
2016-01-16 03:56:22 +03:00
|
|
|
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
|
|
|
|
unsigned long map_offset);
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 13:13:31 +04:00
|
|
|
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
|
|
|
unsigned long pnum);
|
2006-04-07 21:49:15 +04:00
|
|
|
|
2005-10-30 04:16:52 +03:00
|
|
|
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|