2007-10-16 12:26:11 +04:00
|
|
|
/*
|
|
|
|
* linux/mm/page_isolation.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/page-isolation.h>
|
|
|
|
#include <linux/pageblock-flags.h>
|
2012-08-01 03:43:50 +04:00
|
|
|
#include <linux/memory.h>
|
2013-09-12 01:22:09 +04:00
|
|
|
#include <linux/hugetlb.h>
|
2007-10-16 12:26:11 +04:00
|
|
|
#include "internal.h"
|
|
|
|
|
2012-12-12 04:00:45 +04:00
|
|
|
int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
|
2012-08-01 03:43:50 +04:00
|
|
|
{
|
|
|
|
struct zone *zone;
|
|
|
|
unsigned long flags, pfn;
|
|
|
|
struct memory_isolate_notify arg;
|
|
|
|
int notifier_ret;
|
|
|
|
int ret = -EBUSY;
|
|
|
|
|
|
|
|
zone = page_zone(page);
|
|
|
|
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
|
|
|
|
pfn = page_to_pfn(page);
|
|
|
|
arg.start_pfn = pfn;
|
|
|
|
arg.nr_pages = pageblock_nr_pages;
|
|
|
|
arg.pages_found = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It may be possible to isolate a pageblock even if the
|
|
|
|
* migratetype is not MIGRATE_MOVABLE. The memory isolation
|
|
|
|
* notifier chain is used by balloon drivers to return the
|
|
|
|
* number of pages in a range that are held by the balloon
|
|
|
|
* driver to shrink memory. If all the pages are accounted for
|
|
|
|
* by balloons, are free, or on the LRU, isolation can continue.
|
|
|
|
* Later, for example, when memory hotplug notifier runs, these
|
|
|
|
* pages reported as "can be isolated" should be isolated(freed)
|
|
|
|
* by the balloon driver through the memory notifier chain.
|
|
|
|
*/
|
|
|
|
notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
|
|
|
|
notifier_ret = notifier_to_errno(notifier_ret);
|
|
|
|
if (notifier_ret)
|
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
|
|
|
|
* We just check MOVABLE pages.
|
|
|
|
*/
|
2012-12-12 04:00:45 +04:00
|
|
|
if (!has_unmovable_pages(zone, page, arg.pages_found,
|
|
|
|
skip_hwpoisoned_pages))
|
2012-08-01 03:43:50 +04:00
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* immobile means "not-on-lru" paes. If immobile is larger than
|
|
|
|
* removable-by-driver pages reported by notifier, we'll fail.
|
|
|
|
*/
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (!ret) {
|
2012-10-09 03:32:00 +04:00
|
|
|
unsigned long nr_pages;
|
2012-10-09 03:32:02 +04:00
|
|
|
int migratetype = get_pageblock_migratetype(page);
|
2012-10-09 03:32:00 +04:00
|
|
|
|
2013-01-05 03:35:08 +04:00
|
|
|
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
|
mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype
Before describing bugs itself, I first explain definition of freepage.
1. pages on buddy list are counted as freepage.
2. pages on isolate migratetype buddy list are *not* counted as freepage.
3. pages on cma buddy list are counted as CMA freepage, too.
Now, I describe problems and related patch.
Patch 1: There is race conditions on getting pageblock migratetype that
it results in misplacement of freepages on buddy list, incorrect
freepage count and un-availability of freepage.
Patch 2: Freepages on pcp list could have stale cached information to
determine migratetype of buddy list to go. This causes misplacement of
freepages on buddy list and incorrect freepage count.
Patch 4: Merging between freepages on different migratetype of
pageblocks will cause freepages accouting problem. This patch fixes it.
Without patchset [3], above problem doesn't happens on my CMA allocation
test, because CMA reserved pages aren't used at all. So there is no
chance for above race.
With patchset [3], I did simple CMA allocation test and get below
result:
- Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation
- run kernel build (make -j16) on background
- 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval
- Result: more than 5000 freepage count are missed
With patchset [3] and this patchset, I found that no freepage count are
missed so that I conclude that problems are solved.
On my simple memory offlining test, these problems also occur on that
environment, too.
This patch (of 4):
There are two paths to reach core free function of buddy allocator,
__free_one_page(), one is free_one_page()->__free_one_page() and the
other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page().
Each paths has race condition causing serious problems. At first, this
patch is focused on first type of freepath. And then, following patch
will solve the problem in second type of freepath.
In the first type of freepath, we got migratetype of freeing page
without holding the zone lock, so it could be racy. There are two cases
of this race.
1. pages are added to isolate buddy list after restoring orignal
migratetype
CPU1 CPU2
get migratetype => return MIGRATE_ISOLATE
call free_one_page() with MIGRATE_ISOLATE
grab the zone lock
unisolate pageblock
release the zone lock
grab the zone lock
call __free_one_page() with MIGRATE_ISOLATE
freepage go into isolate buddy list,
although pageblock is already unisolated
This may cause two problems. One is that we can't use this page anymore
until next isolation attempt of this pageblock, because freepage is on
isolate buddy list. The other is that freepage accouting could be wrong
due to merging between different buddy list. Freepages on isolate buddy
list aren't counted as freepage, but ones on normal buddy list are
counted as freepage. If merge happens, buddy freepage on normal buddy
list is inevitably moved to isolate buddy list without any consideration
of freepage accouting so it could be incorrect.
2. pages are added to normal buddy list while pageblock is isolated.
It is similar with above case.
This also may cause two problems. One is that we can't keep these
freepages from being allocated. Although this pageblock is isolated,
freepage would be added to normal buddy list so that it could be
allocated without any restriction. And the other problem is same as
case 1, that it, incorrect freepage accouting.
This race condition would be prevented by checking migratetype again
with holding the zone lock. Because it is somewhat heavy operation and
it isn't needed in common case, we want to avoid rechecking as much as
possible. So this patch introduce new variable, nr_isolate_pageblock in
struct zone to check if there is isolated pageblock. With this, we can
avoid to re-check migratetype in common case and do it only if there is
isolated pageblock or migratetype is MIGRATE_ISOLATE. This solve above
mentioned problems.
Changes from v3:
Add one more check in free_one_page() that checks whether migratetype is
MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens.
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Heesub Shin <heesub.shin@samsung.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Ritesh Harjani <ritesh.list@gmail.com>
Cc: Gioh Kim <gioh.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-11-14 02:19:11 +03:00
|
|
|
zone->nr_isolate_pageblock++;
|
2012-10-09 03:32:00 +04:00
|
|
|
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
|
|
|
|
|
2012-10-09 03:32:02 +04:00
|
|
|
__mod_zone_freepage_state(zone, -nr_pages, migratetype);
|
2012-08-01 03:43:50 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
if (!ret)
|
2014-12-11 02:43:04 +03:00
|
|
|
drain_all_pages(zone);
|
2012-08-01 03:43:50 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void unset_migratetype_isolate(struct page *page, unsigned migratetype)
|
|
|
|
{
|
|
|
|
struct zone *zone;
|
2012-10-09 03:32:00 +04:00
|
|
|
unsigned long flags, nr_pages;
|
2014-11-14 02:19:21 +03:00
|
|
|
struct page *isolated_page = NULL;
|
|
|
|
unsigned int order;
|
|
|
|
unsigned long page_idx, buddy_idx;
|
|
|
|
struct page *buddy;
|
2012-10-09 03:32:00 +04:00
|
|
|
|
2012-08-01 03:43:50 +04:00
|
|
|
zone = page_zone(page);
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
|
|
|
goto out;
|
2014-11-14 02:19:21 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Because freepage with more than pageblock_order on isolated
|
|
|
|
* pageblock is restricted to merge due to freepage counting problem,
|
|
|
|
* it is possible that there is free buddy page.
|
|
|
|
* move_freepages_block() doesn't care of merge so we need other
|
|
|
|
* approach in order to merge them. Isolation and free will make
|
|
|
|
* these pages to be merged.
|
|
|
|
*/
|
|
|
|
if (PageBuddy(page)) {
|
|
|
|
order = page_order(page);
|
|
|
|
if (order >= pageblock_order) {
|
|
|
|
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
|
|
|
|
buddy_idx = __find_buddy_index(page_idx, order);
|
|
|
|
buddy = page + (buddy_idx - page_idx);
|
|
|
|
|
2015-05-15 01:17:04 +03:00
|
|
|
if (pfn_valid_within(page_to_pfn(buddy)) &&
|
|
|
|
!is_migrate_isolate_page(buddy)) {
|
2014-11-14 02:19:21 +03:00
|
|
|
__isolate_free_page(page, order);
|
2015-03-26 01:55:26 +03:00
|
|
|
kernel_map_pages(page, (1 << order), 1);
|
2014-11-14 02:19:21 +03:00
|
|
|
set_page_refcounted(page);
|
|
|
|
isolated_page = page;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we isolate freepage with more than pageblock_order, there
|
|
|
|
* should be no freepage in the range, so we could avoid costly
|
|
|
|
* pageblock scanning for freepage moving.
|
|
|
|
*/
|
|
|
|
if (!isolated_page) {
|
|
|
|
nr_pages = move_freepages_block(zone, page, migratetype);
|
|
|
|
__mod_zone_freepage_state(zone, nr_pages, migratetype);
|
|
|
|
}
|
2013-01-05 03:35:08 +04:00
|
|
|
set_pageblock_migratetype(page, migratetype);
|
mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype
Before describing bugs itself, I first explain definition of freepage.
1. pages on buddy list are counted as freepage.
2. pages on isolate migratetype buddy list are *not* counted as freepage.
3. pages on cma buddy list are counted as CMA freepage, too.
Now, I describe problems and related patch.
Patch 1: There is race conditions on getting pageblock migratetype that
it results in misplacement of freepages on buddy list, incorrect
freepage count and un-availability of freepage.
Patch 2: Freepages on pcp list could have stale cached information to
determine migratetype of buddy list to go. This causes misplacement of
freepages on buddy list and incorrect freepage count.
Patch 4: Merging between freepages on different migratetype of
pageblocks will cause freepages accouting problem. This patch fixes it.
Without patchset [3], above problem doesn't happens on my CMA allocation
test, because CMA reserved pages aren't used at all. So there is no
chance for above race.
With patchset [3], I did simple CMA allocation test and get below
result:
- Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation
- run kernel build (make -j16) on background
- 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval
- Result: more than 5000 freepage count are missed
With patchset [3] and this patchset, I found that no freepage count are
missed so that I conclude that problems are solved.
On my simple memory offlining test, these problems also occur on that
environment, too.
This patch (of 4):
There are two paths to reach core free function of buddy allocator,
__free_one_page(), one is free_one_page()->__free_one_page() and the
other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page().
Each paths has race condition causing serious problems. At first, this
patch is focused on first type of freepath. And then, following patch
will solve the problem in second type of freepath.
In the first type of freepath, we got migratetype of freeing page
without holding the zone lock, so it could be racy. There are two cases
of this race.
1. pages are added to isolate buddy list after restoring orignal
migratetype
CPU1 CPU2
get migratetype => return MIGRATE_ISOLATE
call free_one_page() with MIGRATE_ISOLATE
grab the zone lock
unisolate pageblock
release the zone lock
grab the zone lock
call __free_one_page() with MIGRATE_ISOLATE
freepage go into isolate buddy list,
although pageblock is already unisolated
This may cause two problems. One is that we can't use this page anymore
until next isolation attempt of this pageblock, because freepage is on
isolate buddy list. The other is that freepage accouting could be wrong
due to merging between different buddy list. Freepages on isolate buddy
list aren't counted as freepage, but ones on normal buddy list are
counted as freepage. If merge happens, buddy freepage on normal buddy
list is inevitably moved to isolate buddy list without any consideration
of freepage accouting so it could be incorrect.
2. pages are added to normal buddy list while pageblock is isolated.
It is similar with above case.
This also may cause two problems. One is that we can't keep these
freepages from being allocated. Although this pageblock is isolated,
freepage would be added to normal buddy list so that it could be
allocated without any restriction. And the other problem is same as
case 1, that it, incorrect freepage accouting.
This race condition would be prevented by checking migratetype again
with holding the zone lock. Because it is somewhat heavy operation and
it isn't needed in common case, we want to avoid rechecking as much as
possible. So this patch introduce new variable, nr_isolate_pageblock in
struct zone to check if there is isolated pageblock. With this, we can
avoid to re-check migratetype in common case and do it only if there is
isolated pageblock or migratetype is MIGRATE_ISOLATE. This solve above
mentioned problems.
Changes from v3:
Add one more check in free_one_page() that checks whether migratetype is
MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens.
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Heesub Shin <heesub.shin@samsung.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Ritesh Harjani <ritesh.list@gmail.com>
Cc: Gioh Kim <gioh.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-11-14 02:19:11 +03:00
|
|
|
zone->nr_isolate_pageblock--;
|
2012-08-01 03:43:50 +04:00
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
2014-11-14 02:19:21 +03:00
|
|
|
if (isolated_page)
|
|
|
|
__free_pages(isolated_page, order);
|
2012-08-01 03:43:50 +04:00
|
|
|
}
|
|
|
|
|
2007-10-16 12:26:11 +04:00
|
|
|
static inline struct page *
|
|
|
|
__first_valid_page(unsigned long pfn, unsigned long nr_pages)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < nr_pages; i++)
|
|
|
|
if (pfn_valid_within(pfn + i))
|
|
|
|
break;
|
|
|
|
if (unlikely(i == nr_pages))
|
|
|
|
return NULL;
|
|
|
|
return pfn_to_page(pfn + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* start_isolate_page_range() -- make page-allocation-type of range of pages
|
|
|
|
* to be MIGRATE_ISOLATE.
|
|
|
|
* @start_pfn: The lower PFN of the range to be isolated.
|
|
|
|
* @end_pfn: The upper PFN of the range to be isolated.
|
2012-04-03 17:06:15 +04:00
|
|
|
* @migratetype: migrate type to set in error recovery.
|
2007-10-16 12:26:11 +04:00
|
|
|
*
|
|
|
|
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
|
|
|
|
* the range will never be allocated. Any free pages and pages freed in the
|
|
|
|
* future will not be allocated again.
|
|
|
|
*
|
|
|
|
* start_pfn/end_pfn must be aligned to pageblock_order.
|
|
|
|
* Returns 0 on success and -EBUSY if any part of range cannot be isolated.
|
|
|
|
*/
|
2012-04-03 17:06:15 +04:00
|
|
|
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
2012-12-12 04:00:45 +04:00
|
|
|
unsigned migratetype, bool skip_hwpoisoned_pages)
|
2007-10-16 12:26:11 +04:00
|
|
|
{
|
|
|
|
unsigned long pfn;
|
|
|
|
unsigned long undo_pfn;
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
|
|
|
|
for (pfn = start_pfn;
|
|
|
|
pfn < end_pfn;
|
|
|
|
pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2012-12-12 04:00:45 +04:00
|
|
|
if (page &&
|
|
|
|
set_migratetype_isolate(page, skip_hwpoisoned_pages)) {
|
2007-10-16 12:26:11 +04:00
|
|
|
undo_pfn = pfn;
|
|
|
|
goto undo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
undo:
|
|
|
|
for (pfn = start_pfn;
|
2007-11-15 03:59:12 +03:00
|
|
|
pfn < undo_pfn;
|
2007-10-16 12:26:11 +04:00
|
|
|
pfn += pageblock_nr_pages)
|
2012-04-03 17:06:15 +04:00
|
|
|
unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
|
2007-10-16 12:26:11 +04:00
|
|
|
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make isolated pages available again.
|
|
|
|
*/
|
2012-04-03 17:06:15 +04:00
|
|
|
int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
unsigned migratetype)
|
2007-10-16 12:26:11 +04:00
|
|
|
{
|
|
|
|
unsigned long pfn;
|
|
|
|
struct page *page;
|
|
|
|
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
for (pfn = start_pfn;
|
|
|
|
pfn < end_pfn;
|
|
|
|
pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2007-11-15 03:59:12 +03:00
|
|
|
if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
2007-10-16 12:26:11 +04:00
|
|
|
continue;
|
2012-04-03 17:06:15 +04:00
|
|
|
unset_migratetype_isolate(page, migratetype);
|
2007-10-16 12:26:11 +04:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Test all pages in the range is free(means isolated) or not.
|
|
|
|
* all pages in [start_pfn...end_pfn) must be in the same zone.
|
|
|
|
* zone->lock must be held before call this.
|
|
|
|
*
|
2012-04-03 17:06:15 +04:00
|
|
|
* Returns 1 if all pages in the range are isolated.
|
2007-10-16 12:26:11 +04:00
|
|
|
*/
|
|
|
|
static int
|
2012-12-12 04:00:45 +04:00
|
|
|
__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
|
|
|
|
bool skip_hwpoisoned_pages)
|
2007-10-16 12:26:11 +04:00
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
while (pfn < end_pfn) {
|
|
|
|
if (!pfn_valid_within(pfn)) {
|
|
|
|
pfn++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
page = pfn_to_page(pfn);
|
2012-10-09 03:32:14 +04:00
|
|
|
if (PageBuddy(page)) {
|
2012-10-09 03:32:16 +04:00
|
|
|
/*
|
|
|
|
* If race between isolatation and allocation happens,
|
|
|
|
* some free pages could be in MIGRATE_MOVABLE list
|
|
|
|
* although pageblock's migratation type of the page
|
|
|
|
* is MIGRATE_ISOLATE. Catch it and move the page into
|
|
|
|
* MIGRATE_ISOLATE list.
|
|
|
|
*/
|
|
|
|
if (get_freepage_migratetype(page) != MIGRATE_ISOLATE) {
|
|
|
|
struct page *end_page;
|
|
|
|
|
|
|
|
end_page = page + (1 << page_order(page)) - 1;
|
|
|
|
move_freepages(page_zone(page), page, end_page,
|
|
|
|
MIGRATE_ISOLATE);
|
|
|
|
}
|
2007-10-16 12:26:11 +04:00
|
|
|
pfn += 1 << page_order(page);
|
2012-10-09 03:32:14 +04:00
|
|
|
}
|
2007-10-16 12:26:11 +04:00
|
|
|
else if (page_count(page) == 0 &&
|
2012-10-09 03:32:08 +04:00
|
|
|
get_freepage_migratetype(page) == MIGRATE_ISOLATE)
|
2007-10-16 12:26:11 +04:00
|
|
|
pfn += 1;
|
2012-12-12 04:00:45 +04:00
|
|
|
else if (skip_hwpoisoned_pages && PageHWPoison(page)) {
|
|
|
|
/*
|
|
|
|
* The HWPoisoned page may be not in buddy
|
|
|
|
* system, and page_count() is not 0.
|
|
|
|
*/
|
|
|
|
pfn++;
|
|
|
|
continue;
|
|
|
|
}
|
2007-10-16 12:26:11 +04:00
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (pfn < end_pfn)
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2012-12-12 04:00:45 +04:00
|
|
|
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
bool skip_hwpoisoned_pages)
|
2007-10-16 12:26:11 +04:00
|
|
|
{
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 01:50:16 +04:00
|
|
|
unsigned long pfn, flags;
|
2007-10-16 12:26:11 +04:00
|
|
|
struct page *page;
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 01:50:16 +04:00
|
|
|
struct zone *zone;
|
|
|
|
int ret;
|
2007-10-16 12:26:11 +04:00
|
|
|
|
|
|
|
/*
|
2013-06-20 14:10:19 +04:00
|
|
|
* Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
|
|
|
|
* are not aligned to pageblock_nr_pages.
|
|
|
|
* Then we just check migratetype first.
|
2007-10-16 12:26:11 +04:00
|
|
|
*/
|
|
|
|
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2007-11-15 03:59:12 +03:00
|
|
|
if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
2007-10-16 12:26:11 +04:00
|
|
|
break;
|
|
|
|
}
|
2008-11-06 23:53:36 +03:00
|
|
|
page = __first_valid_page(start_pfn, end_pfn - start_pfn);
|
|
|
|
if ((pfn < end_pfn) || !page)
|
2007-10-16 12:26:11 +04:00
|
|
|
return -EBUSY;
|
2013-06-20 14:10:19 +04:00
|
|
|
/* Check all pages are free or marked as ISOLATED */
|
2008-11-06 23:53:36 +03:00
|
|
|
zone = page_zone(page);
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 01:50:16 +04:00
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
2012-12-12 04:00:45 +04:00
|
|
|
ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn,
|
|
|
|
skip_hwpoisoned_pages);
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-03 01:50:16 +04:00
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
return ret ? 0 : -EBUSY;
|
2007-10-16 12:26:11 +04:00
|
|
|
}
|
2012-10-09 03:32:52 +04:00
|
|
|
|
|
|
|
struct page *alloc_migrate_target(struct page *page, unsigned long private,
|
|
|
|
int **resultp)
|
|
|
|
{
|
|
|
|
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
|
|
|
|
|
2013-09-12 01:22:09 +04:00
|
|
|
/*
|
|
|
|
* TODO: allocate a destination hugepage from a nearest neighbor node,
|
|
|
|
* accordance with memory policy of the user process if possible. For
|
|
|
|
* now as a simple work-around, we use the next node for destination.
|
|
|
|
*/
|
|
|
|
if (PageHuge(page)) {
|
|
|
|
nodemask_t src = nodemask_of_node(page_to_nid(page));
|
|
|
|
nodemask_t dst;
|
|
|
|
nodes_complement(dst, src);
|
|
|
|
return alloc_huge_page_node(page_hstate(compound_head(page)),
|
|
|
|
next_node(page_to_nid(page), dst));
|
|
|
|
}
|
|
|
|
|
2012-10-09 03:32:52 +04:00
|
|
|
if (PageHighMem(page))
|
|
|
|
gfp_mask |= __GFP_HIGHMEM;
|
|
|
|
|
|
|
|
return alloc_page(gfp_mask);
|
|
|
|
}
|