mm/hwpoison: introduce per-memory_block hwpoison counter

Currently PageHWPoison flag does not behave well when experiencing memory
hotremove/hotplug.  Any data field in struct page is unreliable when the
associated memory is offlined, and the current mechanism can't tell
whether a memory block is onlined because a new memory devices is
installed or because previous failed offline operations are undone. 
Especially if there's a hwpoisoned memory, it's unclear what the best
option is.

So introduce a new mechanism to make struct memory_block remember that a
memory block has hwpoisoned memory inside it.  And make any online event
fail if the onlining memory block contains hwpoison.  struct memory_block
is freed and reallocated over ACPI-based hotremove/hotplug, but not over
sysfs-based hotremove/hotplug.  So the new counter can distinguish these
cases.

Link: https://lkml.kernel.org/r/20221024062012.1520887-5-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Naoya Horiguchi 2022-10-24 15:20:12 +09:00 коммит произвёл Andrew Morton
Родитель a46c9304b4
Коммит 5033091de8
6 изменённых файлов: 71 добавлений и 36 удалений

Просмотреть файл

@ -175,6 +175,15 @@ int memory_notify(unsigned long val, void *v)
return blocking_notifier_call_chain(&memory_chain, val, v); return blocking_notifier_call_chain(&memory_chain, val, v);
} }
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
static unsigned long memblk_nr_poison(struct memory_block *mem);
#else
static inline unsigned long memblk_nr_poison(struct memory_block *mem)
{
return 0;
}
#endif
static int memory_block_online(struct memory_block *mem) static int memory_block_online(struct memory_block *mem)
{ {
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@ -183,6 +192,9 @@ static int memory_block_online(struct memory_block *mem)
struct zone *zone; struct zone *zone;
int ret; int ret;
if (memblk_nr_poison(mem))
return -EHWPOISON;
zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
start_pfn, nr_pages); start_pfn, nr_pages);
@ -864,6 +876,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
mem = find_memory_block_by_id(block_id); mem = find_memory_block_by_id(block_id);
if (WARN_ON_ONCE(!mem)) if (WARN_ON_ONCE(!mem))
continue; continue;
num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
unregister_memory_block_under_nodes(mem); unregister_memory_block_under_nodes(mem);
remove_memory_block(mem); remove_memory_block(mem);
} }
@ -1164,3 +1177,28 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
} }
return ret; return ret;
} }
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
void memblk_nr_poison_inc(unsigned long pfn)
{
const unsigned long block_id = pfn_to_block_id(pfn);
struct memory_block *mem = find_memory_block_by_id(block_id);
if (mem)
atomic_long_inc(&mem->nr_hwpoison);
}
void memblk_nr_poison_sub(unsigned long pfn, long i)
{
const unsigned long block_id = pfn_to_block_id(pfn);
struct memory_block *mem = find_memory_block_by_id(block_id);
if (mem)
atomic_long_sub(i, &mem->nr_hwpoison);
}
static unsigned long memblk_nr_poison(struct memory_block *mem)
{
return atomic_long_read(&mem->nr_hwpoison);
}
#endif

Просмотреть файл

@ -84,6 +84,9 @@ struct memory_block {
unsigned long nr_vmemmap_pages; unsigned long nr_vmemmap_pages;
struct memory_group *group; /* group (if any) for this block */ struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */ struct list_head group_next; /* next block inside memory group */
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
atomic_long_t nr_hwpoison;
#endif
}; };
int arch_get_memory_phys_device(unsigned long start_pfn); int arch_get_memory_phys_device(unsigned long start_pfn);

Просмотреть файл

@ -3279,7 +3279,8 @@ extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE #ifdef CONFIG_MEMORY_FAILURE
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared); bool *migratable_cleared);
extern void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
#else #else
static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared) bool *migratable_cleared)
@ -3290,6 +3291,23 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
static inline void num_poisoned_pages_inc(unsigned long pfn) static inline void num_poisoned_pages_inc(unsigned long pfn)
{ {
} }
static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
{
}
#endif
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
extern void memblk_nr_poison_inc(unsigned long pfn);
extern void memblk_nr_poison_sub(unsigned long pfn, long i);
#else
static inline void memblk_nr_poison_inc(unsigned long pfn)
{
}
static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
{
}
#endif #endif
#ifndef arch_memory_failure #ifndef arch_memory_failure

Просмотреть файл

@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value;
extern u64 hwpoison_filter_memcg; extern u64 hwpoison_filter_memcg;
extern u32 hwpoison_filter_enable; extern u32 hwpoison_filter_enable;
#ifdef CONFIG_MEMORY_FAILURE
void clear_hwpoisoned_pages(struct page *memmap, int nr_pages);
#else
static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
{
}
#endif
extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long); unsigned long, unsigned long);

Просмотреть файл

@ -77,11 +77,14 @@ static bool hw_memory_failure __read_mostly = false;
inline void num_poisoned_pages_inc(unsigned long pfn) inline void num_poisoned_pages_inc(unsigned long pfn)
{ {
atomic_long_inc(&num_poisoned_pages); atomic_long_inc(&num_poisoned_pages);
memblk_nr_poison_inc(pfn);
} }
static inline void num_poisoned_pages_sub(unsigned long pfn, long i) inline void num_poisoned_pages_sub(unsigned long pfn, long i)
{ {
atomic_long_sub(i, &num_poisoned_pages); atomic_long_sub(i, &num_poisoned_pages);
if (pfn != -1UL)
memblk_nr_poison_sub(pfn, i);
} }
/* /*
@ -1706,6 +1709,8 @@ static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
if (move_flag) if (move_flag)
SetPageHWPoison(p->page); SetPageHWPoison(p->page);
else
num_poisoned_pages_sub(page_to_pfn(p->page), 1);
kfree(p); kfree(p);
count++; count++;
} }
@ -2332,6 +2337,7 @@ int unpoison_memory(unsigned long pfn)
int ret = -EBUSY; int ret = -EBUSY;
int freeit = 0; int freeit = 0;
unsigned long count = 1; unsigned long count = 1;
bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST); DEFAULT_RATELIMIT_BURST);
@ -2380,6 +2386,7 @@ int unpoison_memory(unsigned long pfn)
ret = get_hwpoison_page(p, MF_UNPOISON); ret = get_hwpoison_page(p, MF_UNPOISON);
if (!ret) { if (!ret) {
if (PageHuge(p)) { if (PageHuge(p)) {
huge = true;
count = free_raw_hwp_pages(page, false); count = free_raw_hwp_pages(page, false);
if (count == 0) { if (count == 0) {
ret = -EBUSY; ret = -EBUSY;
@ -2395,6 +2402,7 @@ int unpoison_memory(unsigned long pfn)
pfn, &unpoison_rs); pfn, &unpoison_rs);
} else { } else {
if (PageHuge(p)) { if (PageHuge(p)) {
huge = true;
count = free_raw_hwp_pages(page, false); count = free_raw_hwp_pages(page, false);
if (count == 0) { if (count == 0) {
ret = -EBUSY; ret = -EBUSY;
@ -2414,7 +2422,8 @@ int unpoison_memory(unsigned long pfn)
unlock_mutex: unlock_mutex:
mutex_unlock(&mf_mutex); mutex_unlock(&mf_mutex);
if (!ret || freeit) { if (!ret || freeit) {
num_poisoned_pages_sub(pfn, count); if (!huge)
num_poisoned_pages_sub(pfn, 1);
unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
page_to_pfn(p), &unpoison_rs); page_to_pfn(p), &unpoison_rs);
} }
@ -2609,26 +2618,3 @@ retry:
return ret; return ret;
} }
void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
{
int i, total = 0;
/*
* A further optimization is to have per section refcounted
* num_poisoned_pages. But that would need more space per memmap, so
* for now just do a quick global check to speed up this routine in the
* absence of bad pages.
*/
if (atomic_long_read(&num_poisoned_pages) == 0)
return;
for (i = 0; i < nr_pages; i++) {
if (PageHWPoison(&memmap[i])) {
total++;
ClearPageHWPoison(&memmap[i]);
}
}
if (total)
num_poisoned_pages_sub(0, total);
}

Просмотреть файл

@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
unsigned long nr_pages, unsigned long map_offset, unsigned long nr_pages, unsigned long map_offset,
struct vmem_altmap *altmap) struct vmem_altmap *altmap)
{ {
clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
nr_pages - map_offset);
section_deactivate(pfn, nr_pages, altmap); section_deactivate(pfn, nr_pages, altmap);
} }
#endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* CONFIG_MEMORY_HOTPLUG */