mm: Make compound_pincount always available

Move compound_pincount from the third page to the second page, which
means it's available for all compound pages.  That lets us delete
hpage_pincount_available().

On 32-bit systems, there isn't enough space for both compound_pincount
and compound_nr in the second page (it would collide with page->private,
which is in use for pages in the swap cache), so revert the optimisation
of storing both compound_order and compound_nr on 32-bit systems.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
This commit is contained in:
Matthew Wilcox (Oracle) 2022-01-06 16:46:43 -05:00
Родитель 6315d8a23c
Коммит 5232c63f46
8 изменённых файлов: 42 добавлений и 51 удалений

Просмотреть файл

@ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct
pages* array, and the function then pins pages by incrementing each by a special pages* array, and the function then pins pages by incrementing each by a special
value: GUP_PIN_COUNTING_BIAS. value: GUP_PIN_COUNTING_BIAS.
For huge pages (and in fact, any compound page of more than 2 pages), the For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead,
GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting an exact form of pin counting is achieved, by using the 2nd struct page
is achieved, by using the 3rd struct page in the compound page. A new struct in the compound page. A new struct page field, compound_pincount, has
page field, hpage_pinned_refcount, has been added in order to support this. been added in order to support this.
This approach for compound pages avoids the counting upper limit problems that This approach for compound pages avoids the counting upper limit problems that
are discussed below. Those limitations would have been aggravated severely by are discussed below. Those limitations would have been aggravated severely by
huge pages, because each tail page adds a refcount to the head page. And in huge pages, because each tail page adds a refcount to the head page. And in
fact, testing revealed that, without a separate hpage_pinned_refcount field, fact, testing revealed that, without a separate compound_pincount field,
page overflows were seen in some huge page stress tests. page overflows were seen in some huge page stress tests.
This also means that huge pages and compound pages (of order > 1) do not suffer This also means that huge pages and compound pages do not suffer
from the false positives problem that is mentioned below.:: from the false positives problem that is mentioned below.::
Function Function
@ -264,9 +264,9 @@ place.)
Other diagnostics Other diagnostics
================= =================
dump_page() has been enhanced slightly, to handle these new counting fields, and dump_page() has been enhanced slightly, to handle these new counting
to better report on compound pages in general. Specifically, for compound pages fields, and to better report on compound pages in general. Specifically,
with order > 1, the exact (hpage_pinned_refcount) pincount is reported. for compound pages, the exact (compound_pincount) pincount is reported.
References References
========== ==========

Просмотреть файл

@ -887,17 +887,6 @@ static inline void destroy_compound_page(struct page *page)
compound_page_dtors[page[1].compound_dtor](page); compound_page_dtors[page[1].compound_dtor](page);
} }
static inline bool hpage_pincount_available(struct page *page)
{
/*
* Can the page->hpage_pinned_refcount field be used? That field is in
* the 3rd page of the compound page, so the smallest (2-page) compound
* pages cannot support it.
*/
page = compound_head(page);
return PageCompound(page) && compound_order(page) > 1;
}
static inline int head_compound_pincount(struct page *head) static inline int head_compound_pincount(struct page *head)
{ {
return atomic_read(compound_pincount_ptr(head)); return atomic_read(compound_pincount_ptr(head));
@ -905,7 +894,7 @@ static inline int head_compound_pincount(struct page *head)
static inline int compound_pincount(struct page *page) static inline int compound_pincount(struct page *page)
{ {
VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page);
page = compound_head(page); page = compound_head(page);
return head_compound_pincount(page); return head_compound_pincount(page);
} }
@ -913,7 +902,9 @@ static inline int compound_pincount(struct page *page)
static inline void set_compound_order(struct page *page, unsigned int order) static inline void set_compound_order(struct page *page, unsigned int order)
{ {
page[1].compound_order = order; page[1].compound_order = order;
#ifdef CONFIG_64BIT
page[1].compound_nr = 1U << order; page[1].compound_nr = 1U << order;
#endif
} }
/* Returns the number of pages in this potentially compound page. */ /* Returns the number of pages in this potentially compound page. */
@ -921,7 +912,11 @@ static inline unsigned long compound_nr(struct page *page)
{ {
if (!PageHead(page)) if (!PageHead(page))
return 1; return 1;
#ifdef CONFIG_64BIT
return page[1].compound_nr; return page[1].compound_nr;
#else
return 1UL << compound_order(page);
#endif
} }
/* Returns the number of bytes in this potentially compound page. */ /* Returns the number of bytes in this potentially compound page. */
@ -1269,7 +1264,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages);
*/ */
static inline bool page_maybe_dma_pinned(struct page *page) static inline bool page_maybe_dma_pinned(struct page *page)
{ {
if (hpage_pincount_available(page)) if (PageCompound(page))
return compound_pincount(page) > 0; return compound_pincount(page) > 0;
/* /*

Просмотреть файл

@ -135,11 +135,14 @@ struct page {
unsigned char compound_dtor; unsigned char compound_dtor;
unsigned char compound_order; unsigned char compound_order;
atomic_t compound_mapcount; atomic_t compound_mapcount;
atomic_t compound_pincount;
#ifdef CONFIG_64BIT
unsigned int compound_nr; /* 1 << compound_order */ unsigned int compound_nr; /* 1 << compound_order */
#endif
}; };
struct { /* Second tail page of compound page */ struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_1; /* compound_head */
atomic_t hpage_pinned_refcount; unsigned long _compound_pad_2;
/* For both global and memcg */ /* For both global and memcg */
struct list_head deferred_list; struct list_head deferred_list;
}; };
@ -300,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page)
static inline atomic_t *compound_pincount_ptr(struct page *page) static inline atomic_t *compound_pincount_ptr(struct page *page)
{ {
return &page[2].hpage_pinned_refcount; return &page[1].compound_pincount;
} }
/* /*

Просмотреть файл

@ -92,16 +92,10 @@ static void __dump_page(struct page *page)
page, page_ref_count(head), mapcount, mapping, page, page_ref_count(head), mapcount, mapping,
page_to_pgoff(page), page_to_pfn(page)); page_to_pgoff(page), page_to_pfn(page));
if (compound) { if (compound) {
if (hpage_pincount_available(page)) { pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n",
pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", head, compound_order(head),
head, compound_order(head), head_compound_mapcount(head),
head_compound_mapcount(head), head_compound_pincount(head));
head_compound_pincount(head));
} else {
pr_warn("head:%p order:%u compound_mapcount:%d\n",
head, compound_order(head),
head_compound_mapcount(head));
}
} }
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG

Просмотреть файл

@ -99,12 +99,11 @@ retry:
* *
* FOLL_GET: page's refcount will be incremented by @refs. * FOLL_GET: page's refcount will be incremented by @refs.
* *
* FOLL_PIN on compound pages that are > two pages long: page's refcount will * FOLL_PIN on compound pages: page's refcount will be incremented by
* be incremented by @refs, and page[2].hpage_pinned_refcount will be * @refs, and page[1].compound_pincount will be incremented by @refs.
* incremented by @refs * GUP_PIN_COUNTING_BIAS.
* *
* FOLL_PIN on normal pages, or compound pages that are two pages long: * FOLL_PIN on normal pages: page's refcount will be incremented by
* page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS. * @refs * GUP_PIN_COUNTING_BIAS.
* *
* Return: head page (with refcount appropriately incremented) for success, or * Return: head page (with refcount appropriately incremented) for success, or
* NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's
@ -135,16 +134,15 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
return NULL; return NULL;
/* /*
* When pinning a compound page of order > 1 (which is * When pinning a compound page, use an exact count to
* what hpage_pincount_available() checks for), use an * track it.
* exact count to track it.
* *
* However, be sure to *also* increment the normal page * However, be sure to *also* increment the normal page
* refcount field at least once, so that the page really * refcount field at least once, so that the page really
* is pinned. That's why the refcount from the earlier * is pinned. That's why the refcount from the earlier
* try_get_compound_head() is left intact. * try_get_compound_head() is left intact.
*/ */
if (hpage_pincount_available(page)) if (PageHead(page))
atomic_add(refs, compound_pincount_ptr(page)); atomic_add(refs, compound_pincount_ptr(page));
else else
page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
@ -166,7 +164,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
if (flags & FOLL_PIN) { if (flags & FOLL_PIN) {
mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED,
refs); refs);
if (hpage_pincount_available(page)) if (PageHead(page))
atomic_sub(refs, compound_pincount_ptr(page)); atomic_sub(refs, compound_pincount_ptr(page));
else else
refs *= GUP_PIN_COUNTING_BIAS; refs *= GUP_PIN_COUNTING_BIAS;
@ -211,7 +209,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags)
* increment the normal page refcount field at least once, * increment the normal page refcount field at least once,
* so that the page really is pinned. * so that the page really is pinned.
*/ */
if (hpage_pincount_available(page)) { if (PageHead(page)) {
page_ref_add(page, 1); page_ref_add(page, 1);
atomic_add(1, compound_pincount_ptr(page)); atomic_add(1, compound_pincount_ptr(page));
} else { } else {

Просмотреть файл

@ -1320,7 +1320,9 @@ static void __destroy_compound_gigantic_page(struct page *page,
} }
set_compound_order(page, 0); set_compound_order(page, 0);
#ifdef CONFIG_64BIT
page[1].compound_nr = 0; page[1].compound_nr = 0;
#endif
__ClearPageHead(page); __ClearPageHead(page);
} }
@ -1812,7 +1814,9 @@ out_error:
for (; j < nr_pages; j++, p = mem_map_next(p, page, j)) for (; j < nr_pages; j++, p = mem_map_next(p, page, j))
__ClearPageReserved(p); __ClearPageReserved(p);
set_compound_order(page, 0); set_compound_order(page, 0);
#ifdef CONFIG_64BIT
page[1].compound_nr = 0; page[1].compound_nr = 0;
#endif
__ClearPageHead(page); __ClearPageHead(page);
return false; return false;
} }

Просмотреть файл

@ -734,8 +734,7 @@ static void prep_compound_head(struct page *page, unsigned int order)
set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
set_compound_order(page, order); set_compound_order(page, order);
atomic_set(compound_mapcount_ptr(page), -1); atomic_set(compound_mapcount_ptr(page), -1);
if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
} }
static void prep_compound_tail(struct page *head, int tail_idx) static void prep_compound_tail(struct page *head, int tail_idx)

Просмотреть файл

@ -1219,8 +1219,7 @@ void page_add_new_anon_rmap(struct page *page,
VM_BUG_ON_PAGE(!PageTransHuge(page), page); VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */ /* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0); atomic_set(compound_mapcount_ptr(page), 0);
if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
__mod_lruvec_page_state(page, NR_ANON_THPS, nr); __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
} else { } else {
@ -2353,8 +2352,7 @@ void hugepage_add_new_anon_rmap(struct page *page,
{ {
BUG_ON(address < vma->vm_start || address >= vma->vm_end); BUG_ON(address < vma->vm_start || address >= vma->vm_end);
atomic_set(compound_mapcount_ptr(page), 0); atomic_set(compound_mapcount_ptr(page), 0);
if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
__page_set_anon_rmap(page, vma, address, 1); __page_set_anon_rmap(page, vma, address, 1);
} }