Merge branch 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu
Pull percpu updates from Dennis Zhou: - percpu chunk depopulation - depopulate backing pages for chunks with empty pages when we exceed a global threshold without those pages. This lets us reclaim a portion of memory that would previously be lost until the full chunk would be freed (possibly never). - memcg accounting cleanup - previously separate chunks were managed for normal allocations and __GFP_ACCOUNT allocations. These are now consolidated which cleans up the code quite a bit. - a few misc clean ups for clang warnings * 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu: percpu: optimize locking in pcpu_balance_workfn() percpu: initialize best_upa variable percpu: rework memcg accounting mm, memcg: introduce mem_cgroup_kmem_disabled() mm, memcg: mark cgroup_memory_nosocket, nokmem and noswap as __ro_after_init percpu: make symbol 'pcpu_free_slot' static percpu: implement partial chunk depopulation percpu: use pcpu_free_slot instead of pcpu_nr_slots - 1 percpu: factor out pcpu_check_block_hint() percpu: split __pcpu_balance_workfn() percpu: fix a comment about the chunks ordering
This commit is contained in:
Коммит
e267992f9e
|
@ -1619,6 +1619,7 @@ static inline void set_shrinker_bit(struct mem_cgroup *memcg,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
|
bool mem_cgroup_kmem_disabled(void);
|
||||||
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
|
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
|
||||||
void __memcg_kmem_uncharge_page(struct page *page, int order);
|
void __memcg_kmem_uncharge_page(struct page *page, int order);
|
||||||
|
|
||||||
|
@ -1672,6 +1673,10 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
|
||||||
struct mem_cgroup *mem_cgroup_from_obj(void *p);
|
struct mem_cgroup *mem_cgroup_from_obj(void *p);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
static inline bool mem_cgroup_kmem_disabled(void)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
|
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
|
||||||
int order)
|
int order)
|
||||||
|
|
|
@ -81,14 +81,14 @@ DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
|
||||||
EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
|
EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
|
||||||
|
|
||||||
/* Socket memory accounting disabled? */
|
/* Socket memory accounting disabled? */
|
||||||
static bool cgroup_memory_nosocket;
|
static bool cgroup_memory_nosocket __ro_after_init;
|
||||||
|
|
||||||
/* Kernel memory accounting disabled? */
|
/* Kernel memory accounting disabled? */
|
||||||
bool cgroup_memory_nokmem;
|
bool cgroup_memory_nokmem __ro_after_init;
|
||||||
|
|
||||||
/* Whether the swap controller is active */
|
/* Whether the swap controller is active */
|
||||||
#ifdef CONFIG_MEMCG_SWAP
|
#ifdef CONFIG_MEMCG_SWAP
|
||||||
bool cgroup_memory_noswap __read_mostly;
|
bool cgroup_memory_noswap __ro_after_init;
|
||||||
#else
|
#else
|
||||||
#define cgroup_memory_noswap 1
|
#define cgroup_memory_noswap 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -256,6 +256,11 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
extern spinlock_t css_set_lock;
|
extern spinlock_t css_set_lock;
|
||||||
|
|
||||||
|
bool mem_cgroup_kmem_disabled(void)
|
||||||
|
{
|
||||||
|
return cgroup_memory_nokmem;
|
||||||
|
}
|
||||||
|
|
||||||
static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
|
static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
|
||||||
unsigned int nr_pages);
|
unsigned int nr_pages);
|
||||||
|
|
||||||
|
|
|
@ -5,25 +5,6 @@
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* There are two chunk types: root and memcg-aware.
|
|
||||||
* Chunks of each type have separate slots list.
|
|
||||||
*
|
|
||||||
* Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is
|
|
||||||
* used to store memcg membership data of a percpu object. Obj_cgroups are
|
|
||||||
* ref-counted pointers to a memory cgroup with an ability to switch dynamically
|
|
||||||
* to the parent memory cgroup. This allows to reclaim a deleted memory cgroup
|
|
||||||
* without reclaiming of all outstanding objects, which hold a reference at it.
|
|
||||||
*/
|
|
||||||
enum pcpu_chunk_type {
|
|
||||||
PCPU_CHUNK_ROOT,
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
|
||||||
PCPU_CHUNK_MEMCG,
|
|
||||||
#endif
|
|
||||||
PCPU_NR_CHUNK_TYPES,
|
|
||||||
PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pcpu_block_md is the metadata block struct.
|
* pcpu_block_md is the metadata block struct.
|
||||||
* Each chunk's bitmap is split into a number of full blocks.
|
* Each chunk's bitmap is split into a number of full blocks.
|
||||||
|
@ -67,6 +48,8 @@ struct pcpu_chunk {
|
||||||
|
|
||||||
void *data; /* chunk data */
|
void *data; /* chunk data */
|
||||||
bool immutable; /* no [de]population allowed */
|
bool immutable; /* no [de]population allowed */
|
||||||
|
bool isolated; /* isolated from active chunk
|
||||||
|
slots */
|
||||||
int start_offset; /* the overlap with the previous
|
int start_offset; /* the overlap with the previous
|
||||||
region to have a page aligned
|
region to have a page aligned
|
||||||
base_addr */
|
base_addr */
|
||||||
|
@ -87,7 +70,9 @@ extern spinlock_t pcpu_lock;
|
||||||
|
|
||||||
extern struct list_head *pcpu_chunk_lists;
|
extern struct list_head *pcpu_chunk_lists;
|
||||||
extern int pcpu_nr_slots;
|
extern int pcpu_nr_slots;
|
||||||
extern int pcpu_nr_empty_pop_pages[];
|
extern int pcpu_sidelined_slot;
|
||||||
|
extern int pcpu_to_depopulate_slot;
|
||||||
|
extern int pcpu_nr_empty_pop_pages;
|
||||||
|
|
||||||
extern struct pcpu_chunk *pcpu_first_chunk;
|
extern struct pcpu_chunk *pcpu_first_chunk;
|
||||||
extern struct pcpu_chunk *pcpu_reserved_chunk;
|
extern struct pcpu_chunk *pcpu_reserved_chunk;
|
||||||
|
@ -128,37 +113,6 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
|
||||||
return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
|
return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
|
||||||
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
|
|
||||||
{
|
|
||||||
if (chunk->obj_cgroups)
|
|
||||||
return PCPU_CHUNK_MEMCG;
|
|
||||||
return PCPU_CHUNK_ROOT;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
|
|
||||||
{
|
|
||||||
return chunk_type == PCPU_CHUNK_MEMCG;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
|
|
||||||
{
|
|
||||||
return PCPU_CHUNK_ROOT;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type)
|
|
||||||
{
|
|
||||||
return &pcpu_chunk_lists[pcpu_nr_slots *
|
|
||||||
pcpu_is_memcg_chunk(chunk_type)];
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_PERCPU_STATS
|
#ifdef CONFIG_PERCPU_STATS
|
||||||
|
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
|
|
@ -44,8 +44,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
|
||||||
/* nada */
|
/* nada */
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
|
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
|
||||||
gfp_t gfp)
|
|
||||||
{
|
{
|
||||||
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
|
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
|
||||||
struct pcpu_chunk *chunk;
|
struct pcpu_chunk *chunk;
|
||||||
|
@ -53,7 +52,7 @@ static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
chunk = pcpu_alloc_chunk(type, gfp);
|
chunk = pcpu_alloc_chunk(gfp);
|
||||||
if (!chunk)
|
if (!chunk)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -118,3 +117,8 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool pcpu_should_reclaim_chunk(struct pcpu_chunk *chunk)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
|
@ -34,15 +34,11 @@ static int find_max_nr_alloc(void)
|
||||||
{
|
{
|
||||||
struct pcpu_chunk *chunk;
|
struct pcpu_chunk *chunk;
|
||||||
int slot, max_nr_alloc;
|
int slot, max_nr_alloc;
|
||||||
enum pcpu_chunk_type type;
|
|
||||||
|
|
||||||
max_nr_alloc = 0;
|
max_nr_alloc = 0;
|
||||||
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
|
for (slot = 0; slot < pcpu_nr_slots; slot++)
|
||||||
for (slot = 0; slot < pcpu_nr_slots; slot++)
|
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list)
|
||||||
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
|
max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc);
|
||||||
list)
|
|
||||||
max_nr_alloc = max(max_nr_alloc,
|
|
||||||
chunk->nr_alloc);
|
|
||||||
|
|
||||||
return max_nr_alloc;
|
return max_nr_alloc;
|
||||||
}
|
}
|
||||||
|
@ -133,9 +129,6 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
|
||||||
P("cur_min_alloc", cur_min_alloc);
|
P("cur_min_alloc", cur_min_alloc);
|
||||||
P("cur_med_alloc", cur_med_alloc);
|
P("cur_med_alloc", cur_med_alloc);
|
||||||
P("cur_max_alloc", cur_max_alloc);
|
P("cur_max_alloc", cur_max_alloc);
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
|
||||||
P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)));
|
|
||||||
#endif
|
|
||||||
seq_putc(m, '\n');
|
seq_putc(m, '\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,8 +137,6 @@ static int percpu_stats_show(struct seq_file *m, void *v)
|
||||||
struct pcpu_chunk *chunk;
|
struct pcpu_chunk *chunk;
|
||||||
int slot, max_nr_alloc;
|
int slot, max_nr_alloc;
|
||||||
int *buffer;
|
int *buffer;
|
||||||
enum pcpu_chunk_type type;
|
|
||||||
int nr_empty_pop_pages;
|
|
||||||
|
|
||||||
alloc_buffer:
|
alloc_buffer:
|
||||||
spin_lock_irq(&pcpu_lock);
|
spin_lock_irq(&pcpu_lock);
|
||||||
|
@ -166,10 +157,6 @@ alloc_buffer:
|
||||||
goto alloc_buffer;
|
goto alloc_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
nr_empty_pop_pages = 0;
|
|
||||||
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
|
|
||||||
nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
|
|
||||||
|
|
||||||
#define PL(X) \
|
#define PL(X) \
|
||||||
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
|
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
|
||||||
|
|
||||||
|
@ -201,7 +188,7 @@ alloc_buffer:
|
||||||
PU(nr_max_chunks);
|
PU(nr_max_chunks);
|
||||||
PU(min_alloc_size);
|
PU(min_alloc_size);
|
||||||
PU(max_alloc_size);
|
PU(max_alloc_size);
|
||||||
P("empty_pop_pages", nr_empty_pop_pages);
|
P("empty_pop_pages", pcpu_nr_empty_pop_pages);
|
||||||
seq_putc(m, '\n');
|
seq_putc(m, '\n');
|
||||||
|
|
||||||
#undef PU
|
#undef PU
|
||||||
|
@ -215,18 +202,17 @@ alloc_buffer:
|
||||||
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
|
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
|
for (slot = 0; slot < pcpu_nr_slots; slot++) {
|
||||||
for (slot = 0; slot < pcpu_nr_slots; slot++) {
|
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
|
||||||
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
|
if (chunk == pcpu_first_chunk)
|
||||||
list) {
|
seq_puts(m, "Chunk: <- First Chunk\n");
|
||||||
if (chunk == pcpu_first_chunk) {
|
else if (slot == pcpu_to_depopulate_slot)
|
||||||
seq_puts(m, "Chunk: <- First Chunk\n");
|
seq_puts(m, "Chunk (to_depopulate)\n");
|
||||||
chunk_map_stats(m, chunk, buffer);
|
else if (slot == pcpu_sidelined_slot)
|
||||||
} else {
|
seq_puts(m, "Chunk (sidelined):\n");
|
||||||
seq_puts(m, "Chunk:\n");
|
else
|
||||||
chunk_map_stats(m, chunk, buffer);
|
seq_puts(m, "Chunk:\n");
|
||||||
}
|
chunk_map_stats(m, chunk, buffer);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -329,13 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
|
||||||
pcpu_free_pages(chunk, pages, page_start, page_end);
|
pcpu_free_pages(chunk, pages, page_start, page_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
|
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
|
||||||
gfp_t gfp)
|
|
||||||
{
|
{
|
||||||
struct pcpu_chunk *chunk;
|
struct pcpu_chunk *chunk;
|
||||||
struct vm_struct **vms;
|
struct vm_struct **vms;
|
||||||
|
|
||||||
chunk = pcpu_alloc_chunk(type, gfp);
|
chunk = pcpu_alloc_chunk(gfp);
|
||||||
if (!chunk)
|
if (!chunk)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -378,3 +377,33 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
|
||||||
/* no extra restriction */
|
/* no extra restriction */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pcpu_should_reclaim_chunk - determine if a chunk should go into reclaim
|
||||||
|
* @chunk: chunk of interest
|
||||||
|
*
|
||||||
|
* This is the entry point for percpu reclaim. If a chunk qualifies, it is then
|
||||||
|
* isolated and managed in separate lists at the back of pcpu_slot: sidelined
|
||||||
|
* and to_depopulate respectively. The to_depopulate list holds chunks slated
|
||||||
|
* for depopulation. They no longer contribute to pcpu_nr_empty_pop_pages once
|
||||||
|
* they are on this list. Once depopulated, they are moved onto the sidelined
|
||||||
|
* list which enables them to be pulled back in for allocation if no other chunk
|
||||||
|
* can suffice the allocation.
|
||||||
|
*/
|
||||||
|
static bool pcpu_should_reclaim_chunk(struct pcpu_chunk *chunk)
|
||||||
|
{
|
||||||
|
/* do not reclaim either the first chunk or reserved chunk */
|
||||||
|
if (chunk == pcpu_first_chunk || chunk == pcpu_reserved_chunk)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If it is isolated, it may be on the sidelined list so move it back to
|
||||||
|
* the to_depopulate list. If we hit at least 1/4 pages empty pages AND
|
||||||
|
* there is no system-wide shortage of empty pages aside from this
|
||||||
|
* chunk, move it to the to_depopulate list.
|
||||||
|
*/
|
||||||
|
return ((chunk->isolated && chunk->nr_empty_pop_pages) ||
|
||||||
|
(pcpu_nr_empty_pop_pages >
|
||||||
|
(PCPU_EMPTY_POP_PAGES_HIGH + chunk->nr_empty_pop_pages) &&
|
||||||
|
chunk->nr_empty_pop_pages >= chunk->nr_pages / 4));
|
||||||
|
}
|
||||||
|
|
368
mm/percpu.c
368
mm/percpu.c
|
@ -99,7 +99,10 @@
|
||||||
|
|
||||||
#include "percpu-internal.h"
|
#include "percpu-internal.h"
|
||||||
|
|
||||||
/* the slots are sorted by free bytes left, 1-31 bytes share the same slot */
|
/*
|
||||||
|
* The slots are sorted by the size of the biggest continuous free area.
|
||||||
|
* 1-31 bytes share the same slot.
|
||||||
|
*/
|
||||||
#define PCPU_SLOT_BASE_SHIFT 5
|
#define PCPU_SLOT_BASE_SHIFT 5
|
||||||
/* chunks in slots below this are subject to being sidelined on failed alloc */
|
/* chunks in slots below this are subject to being sidelined on failed alloc */
|
||||||
#define PCPU_SLOT_FAIL_THRESHOLD 3
|
#define PCPU_SLOT_FAIL_THRESHOLD 3
|
||||||
|
@ -132,6 +135,9 @@ static int pcpu_unit_size __ro_after_init;
|
||||||
static int pcpu_nr_units __ro_after_init;
|
static int pcpu_nr_units __ro_after_init;
|
||||||
static int pcpu_atom_size __ro_after_init;
|
static int pcpu_atom_size __ro_after_init;
|
||||||
int pcpu_nr_slots __ro_after_init;
|
int pcpu_nr_slots __ro_after_init;
|
||||||
|
static int pcpu_free_slot __ro_after_init;
|
||||||
|
int pcpu_sidelined_slot __ro_after_init;
|
||||||
|
int pcpu_to_depopulate_slot __ro_after_init;
|
||||||
static size_t pcpu_chunk_struct_size __ro_after_init;
|
static size_t pcpu_chunk_struct_size __ro_after_init;
|
||||||
|
|
||||||
/* cpus with the lowest and highest unit addresses */
|
/* cpus with the lowest and highest unit addresses */
|
||||||
|
@ -173,10 +179,10 @@ struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */
|
||||||
static LIST_HEAD(pcpu_map_extend_chunks);
|
static LIST_HEAD(pcpu_map_extend_chunks);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The number of empty populated pages by chunk type, protected by pcpu_lock.
|
* The number of empty populated pages, protected by pcpu_lock.
|
||||||
* The reserved chunk doesn't contribute to the count.
|
* The reserved chunk doesn't contribute to the count.
|
||||||
*/
|
*/
|
||||||
int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
|
int pcpu_nr_empty_pop_pages;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The number of populated pages in use by the allocator, protected by
|
* The number of populated pages in use by the allocator, protected by
|
||||||
|
@ -234,7 +240,7 @@ static int __pcpu_size_to_slot(int size)
|
||||||
static int pcpu_size_to_slot(int size)
|
static int pcpu_size_to_slot(int size)
|
||||||
{
|
{
|
||||||
if (size == pcpu_unit_size)
|
if (size == pcpu_unit_size)
|
||||||
return pcpu_nr_slots - 1;
|
return pcpu_free_slot;
|
||||||
return __pcpu_size_to_slot(size);
|
return __pcpu_size_to_slot(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,6 +309,25 @@ static unsigned long pcpu_block_off_to_off(int index, int off)
|
||||||
return index * PCPU_BITMAP_BLOCK_BITS + off;
|
return index * PCPU_BITMAP_BLOCK_BITS + off;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pcpu_check_block_hint - check against the contig hint
|
||||||
|
* @block: block of interest
|
||||||
|
* @bits: size of allocation
|
||||||
|
* @align: alignment of area (max PAGE_SIZE)
|
||||||
|
*
|
||||||
|
* Check to see if the allocation can fit in the block's contig hint.
|
||||||
|
* Note, a chunk uses the same hints as a block so this can also check against
|
||||||
|
* the chunk's contig hint.
|
||||||
|
*/
|
||||||
|
static bool pcpu_check_block_hint(struct pcpu_block_md *block, int bits,
|
||||||
|
size_t align)
|
||||||
|
{
|
||||||
|
int bit_off = ALIGN(block->contig_hint_start, align) -
|
||||||
|
block->contig_hint_start;
|
||||||
|
|
||||||
|
return bit_off + bits <= block->contig_hint;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pcpu_next_hint - determine which hint to use
|
* pcpu_next_hint - determine which hint to use
|
||||||
* @block: block of interest
|
* @block: block of interest
|
||||||
|
@ -507,13 +532,10 @@ static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot,
|
||||||
bool move_front)
|
bool move_front)
|
||||||
{
|
{
|
||||||
if (chunk != pcpu_reserved_chunk) {
|
if (chunk != pcpu_reserved_chunk) {
|
||||||
struct list_head *pcpu_slot;
|
|
||||||
|
|
||||||
pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk));
|
|
||||||
if (move_front)
|
if (move_front)
|
||||||
list_move(&chunk->list, &pcpu_slot[slot]);
|
list_move(&chunk->list, &pcpu_chunk_lists[slot]);
|
||||||
else
|
else
|
||||||
list_move_tail(&chunk->list, &pcpu_slot[slot]);
|
list_move_tail(&chunk->list, &pcpu_chunk_lists[slot]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -539,10 +561,36 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
|
||||||
{
|
{
|
||||||
int nslot = pcpu_chunk_slot(chunk);
|
int nslot = pcpu_chunk_slot(chunk);
|
||||||
|
|
||||||
|
/* leave isolated chunks in-place */
|
||||||
|
if (chunk->isolated)
|
||||||
|
return;
|
||||||
|
|
||||||
if (oslot != nslot)
|
if (oslot != nslot)
|
||||||
__pcpu_chunk_move(chunk, nslot, oslot < nslot);
|
__pcpu_chunk_move(chunk, nslot, oslot < nslot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&pcpu_lock);
|
||||||
|
|
||||||
|
if (!chunk->isolated) {
|
||||||
|
chunk->isolated = true;
|
||||||
|
pcpu_nr_empty_pop_pages -= chunk->nr_empty_pop_pages;
|
||||||
|
}
|
||||||
|
list_move(&chunk->list, &pcpu_chunk_lists[pcpu_to_depopulate_slot]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&pcpu_lock);
|
||||||
|
|
||||||
|
if (chunk->isolated) {
|
||||||
|
chunk->isolated = false;
|
||||||
|
pcpu_nr_empty_pop_pages += chunk->nr_empty_pop_pages;
|
||||||
|
pcpu_chunk_relocate(chunk, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pcpu_update_empty_pages - update empty page counters
|
* pcpu_update_empty_pages - update empty page counters
|
||||||
* @chunk: chunk of interest
|
* @chunk: chunk of interest
|
||||||
|
@ -555,8 +603,8 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
|
||||||
static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
|
static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
|
||||||
{
|
{
|
||||||
chunk->nr_empty_pop_pages += nr;
|
chunk->nr_empty_pop_pages += nr;
|
||||||
if (chunk != pcpu_reserved_chunk)
|
if (chunk != pcpu_reserved_chunk && !chunk->isolated)
|
||||||
pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
|
pcpu_nr_empty_pop_pages += nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1063,14 +1111,11 @@ static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits,
|
||||||
int bit_off, bits, next_off;
|
int bit_off, bits, next_off;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check to see if the allocation can fit in the chunk's contig hint.
|
* This is an optimization to prevent scanning by assuming if the
|
||||||
* This is an optimization to prevent scanning by assuming if it
|
* allocation cannot fit in the global hint, there is memory pressure
|
||||||
* cannot fit in the global hint, there is memory pressure and creating
|
* and creating a new chunk would happen soon.
|
||||||
* a new chunk would happen soon.
|
|
||||||
*/
|
*/
|
||||||
bit_off = ALIGN(chunk_md->contig_hint_start, align) -
|
if (!pcpu_check_block_hint(chunk_md, alloc_bits, align))
|
||||||
chunk_md->contig_hint_start;
|
|
||||||
if (bit_off + alloc_bits > chunk_md->contig_hint)
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
bit_off = pcpu_next_hint(chunk_md, alloc_bits);
|
bit_off = pcpu_next_hint(chunk_md, alloc_bits);
|
||||||
|
@ -1352,7 +1397,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
|
||||||
alloc_size);
|
alloc_size);
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
/* first chunk isn't memcg-aware */
|
/* first chunk is free to use */
|
||||||
chunk->obj_cgroups = NULL;
|
chunk->obj_cgroups = NULL;
|
||||||
#endif
|
#endif
|
||||||
pcpu_init_md_blocks(chunk);
|
pcpu_init_md_blocks(chunk);
|
||||||
|
@ -1394,7 +1439,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp)
|
static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct pcpu_chunk *chunk;
|
struct pcpu_chunk *chunk;
|
||||||
int region_bits;
|
int region_bits;
|
||||||
|
@ -1423,7 +1468,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp)
|
||||||
goto md_blocks_fail;
|
goto md_blocks_fail;
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
if (pcpu_is_memcg_chunk(type)) {
|
if (!mem_cgroup_kmem_disabled()) {
|
||||||
chunk->obj_cgroups =
|
chunk->obj_cgroups =
|
||||||
pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
|
pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
|
||||||
sizeof(struct obj_cgroup *), gfp);
|
sizeof(struct obj_cgroup *), gfp);
|
||||||
|
@ -1536,8 +1581,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
|
||||||
int page_start, int page_end, gfp_t gfp);
|
int page_start, int page_end, gfp_t gfp);
|
||||||
static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
|
static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
|
||||||
int page_start, int page_end);
|
int page_start, int page_end);
|
||||||
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
|
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
|
||||||
gfp_t gfp);
|
|
||||||
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
|
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
|
||||||
static struct page *pcpu_addr_to_page(void *addr);
|
static struct page *pcpu_addr_to_page(void *addr);
|
||||||
static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
|
static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
|
||||||
|
@ -1580,25 +1624,25 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_KMEM
|
#ifdef CONFIG_MEMCG_KMEM
|
||||||
static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
|
static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
|
||||||
struct obj_cgroup **objcgp)
|
struct obj_cgroup **objcgp)
|
||||||
{
|
{
|
||||||
struct obj_cgroup *objcg;
|
struct obj_cgroup *objcg;
|
||||||
|
|
||||||
if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT))
|
if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT))
|
||||||
return PCPU_CHUNK_ROOT;
|
return true;
|
||||||
|
|
||||||
objcg = get_obj_cgroup_from_current();
|
objcg = get_obj_cgroup_from_current();
|
||||||
if (!objcg)
|
if (!objcg)
|
||||||
return PCPU_CHUNK_ROOT;
|
return true;
|
||||||
|
|
||||||
if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) {
|
if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) {
|
||||||
obj_cgroup_put(objcg);
|
obj_cgroup_put(objcg);
|
||||||
return PCPU_FAIL_ALLOC;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
*objcgp = objcg;
|
*objcgp = objcg;
|
||||||
return PCPU_CHUNK_MEMCG;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
|
static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
|
||||||
|
@ -1608,7 +1652,7 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
|
||||||
if (!objcg)
|
if (!objcg)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (chunk) {
|
if (likely(chunk && chunk->obj_cgroups)) {
|
||||||
chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
|
chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -1625,10 +1669,12 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
|
||||||
{
|
{
|
||||||
struct obj_cgroup *objcg;
|
struct obj_cgroup *objcg;
|
||||||
|
|
||||||
if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)))
|
if (unlikely(!chunk->obj_cgroups))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
|
objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
|
||||||
|
if (!objcg)
|
||||||
|
return;
|
||||||
chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
|
chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
|
||||||
|
|
||||||
obj_cgroup_uncharge(objcg, size * num_possible_cpus());
|
obj_cgroup_uncharge(objcg, size * num_possible_cpus());
|
||||||
|
@ -1642,10 +1688,10 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_MEMCG_KMEM */
|
#else /* CONFIG_MEMCG_KMEM */
|
||||||
static enum pcpu_chunk_type
|
static bool
|
||||||
pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp)
|
pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp)
|
||||||
{
|
{
|
||||||
return PCPU_CHUNK_ROOT;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
|
static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
|
||||||
|
@ -1680,8 +1726,6 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
|
||||||
gfp_t pcpu_gfp;
|
gfp_t pcpu_gfp;
|
||||||
bool is_atomic;
|
bool is_atomic;
|
||||||
bool do_warn;
|
bool do_warn;
|
||||||
enum pcpu_chunk_type type;
|
|
||||||
struct list_head *pcpu_slot;
|
|
||||||
struct obj_cgroup *objcg = NULL;
|
struct obj_cgroup *objcg = NULL;
|
||||||
static int warn_limit = 10;
|
static int warn_limit = 10;
|
||||||
struct pcpu_chunk *chunk, *next;
|
struct pcpu_chunk *chunk, *next;
|
||||||
|
@ -1717,10 +1761,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg);
|
if (unlikely(!pcpu_memcg_pre_alloc_hook(size, gfp, &objcg)))
|
||||||
if (unlikely(type == PCPU_FAIL_ALLOC))
|
|
||||||
return NULL;
|
return NULL;
|
||||||
pcpu_slot = pcpu_chunk_list(type);
|
|
||||||
|
|
||||||
if (!is_atomic) {
|
if (!is_atomic) {
|
||||||
/*
|
/*
|
||||||
|
@ -1758,8 +1800,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
/* search through normal chunks */
|
/* search through normal chunks */
|
||||||
for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
|
for (slot = pcpu_size_to_slot(size); slot <= pcpu_free_slot; slot++) {
|
||||||
list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) {
|
list_for_each_entry_safe(chunk, next, &pcpu_chunk_lists[slot],
|
||||||
|
list) {
|
||||||
off = pcpu_find_block_fit(chunk, bits, bit_align,
|
off = pcpu_find_block_fit(chunk, bits, bit_align,
|
||||||
is_atomic);
|
is_atomic);
|
||||||
if (off < 0) {
|
if (off < 0) {
|
||||||
|
@ -1769,9 +1812,10 @@ restart:
|
||||||
}
|
}
|
||||||
|
|
||||||
off = pcpu_alloc_area(chunk, bits, bit_align, off);
|
off = pcpu_alloc_area(chunk, bits, bit_align, off);
|
||||||
if (off >= 0)
|
if (off >= 0) {
|
||||||
|
pcpu_reintegrate_chunk(chunk);
|
||||||
goto area_found;
|
goto area_found;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1787,8 +1831,8 @@ restart:
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
|
if (list_empty(&pcpu_chunk_lists[pcpu_free_slot])) {
|
||||||
chunk = pcpu_create_chunk(type, pcpu_gfp);
|
chunk = pcpu_create_chunk(pcpu_gfp);
|
||||||
if (!chunk) {
|
if (!chunk) {
|
||||||
err = "failed to allocate new chunk";
|
err = "failed to allocate new chunk";
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -1832,7 +1876,7 @@ area_found:
|
||||||
mutex_unlock(&pcpu_alloc_mutex);
|
mutex_unlock(&pcpu_alloc_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW)
|
if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
|
||||||
pcpu_schedule_balance_work();
|
pcpu_schedule_balance_work();
|
||||||
|
|
||||||
/* clear the areas and return address relative to base address */
|
/* clear the areas and return address relative to base address */
|
||||||
|
@ -1930,33 +1974,28 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __pcpu_balance_workfn - manage the amount of free chunks and populated pages
|
* pcpu_balance_free - manage the amount of free chunks
|
||||||
* @type: chunk type
|
* @empty_only: free chunks only if there are no populated pages
|
||||||
*
|
*
|
||||||
* Reclaim all fully free chunks except for the first one. This is also
|
* If empty_only is %false, reclaim all fully free chunks regardless of the
|
||||||
* responsible for maintaining the pool of empty populated pages. However,
|
* number of populated pages. Otherwise, only reclaim chunks that have no
|
||||||
* it is possible that this is called when physical memory is scarce causing
|
* populated pages.
|
||||||
* OOM killer to be triggered. We should avoid doing so until an actual
|
*
|
||||||
* allocation causes the failure as it is possible that requests can be
|
* CONTEXT:
|
||||||
* serviced from already backed regions.
|
* pcpu_lock (can be dropped temporarily)
|
||||||
*/
|
*/
|
||||||
static void __pcpu_balance_workfn(enum pcpu_chunk_type type)
|
static void pcpu_balance_free(bool empty_only)
|
||||||
{
|
{
|
||||||
/* gfp flags passed to underlying allocators */
|
|
||||||
const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
|
|
||||||
LIST_HEAD(to_free);
|
LIST_HEAD(to_free);
|
||||||
struct list_head *pcpu_slot = pcpu_chunk_list(type);
|
struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
|
||||||
struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
|
|
||||||
struct pcpu_chunk *chunk, *next;
|
struct pcpu_chunk *chunk, *next;
|
||||||
int slot, nr_to_pop, ret;
|
|
||||||
|
lockdep_assert_held(&pcpu_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There's no reason to keep around multiple unused chunks and VM
|
* There's no reason to keep around multiple unused chunks and VM
|
||||||
* areas can be scarce. Destroy all free chunks except for one.
|
* areas can be scarce. Destroy all free chunks except for one.
|
||||||
*/
|
*/
|
||||||
mutex_lock(&pcpu_alloc_mutex);
|
|
||||||
spin_lock_irq(&pcpu_lock);
|
|
||||||
|
|
||||||
list_for_each_entry_safe(chunk, next, free_head, list) {
|
list_for_each_entry_safe(chunk, next, free_head, list) {
|
||||||
WARN_ON(chunk->immutable);
|
WARN_ON(chunk->immutable);
|
||||||
|
|
||||||
|
@ -1964,11 +2003,14 @@ static void __pcpu_balance_workfn(enum pcpu_chunk_type type)
|
||||||
if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
|
if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
list_move(&chunk->list, &to_free);
|
if (!empty_only || chunk->nr_empty_pop_pages == 0)
|
||||||
|
list_move(&chunk->list, &to_free);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_irq(&pcpu_lock);
|
if (list_empty(&to_free))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_unlock_irq(&pcpu_lock);
|
||||||
list_for_each_entry_safe(chunk, next, &to_free, list) {
|
list_for_each_entry_safe(chunk, next, &to_free, list) {
|
||||||
unsigned int rs, re;
|
unsigned int rs, re;
|
||||||
|
|
||||||
|
@ -1982,6 +2024,29 @@ static void __pcpu_balance_workfn(enum pcpu_chunk_type type)
|
||||||
pcpu_destroy_chunk(chunk);
|
pcpu_destroy_chunk(chunk);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
spin_lock_irq(&pcpu_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pcpu_balance_populated - manage the amount of populated pages
|
||||||
|
*
|
||||||
|
* Maintain a certain amount of populated pages to satisfy atomic allocations.
|
||||||
|
* It is possible that this is called when physical memory is scarce causing
|
||||||
|
* OOM killer to be triggered. We should avoid doing so until an actual
|
||||||
|
* allocation causes the failure as it is possible that requests can be
|
||||||
|
* serviced from already backed regions.
|
||||||
|
*
|
||||||
|
* CONTEXT:
|
||||||
|
* pcpu_lock (can be dropped temporarily)
|
||||||
|
*/
|
||||||
|
static void pcpu_balance_populated(void)
|
||||||
|
{
|
||||||
|
/* gfp flags passed to underlying allocators */
|
||||||
|
const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
|
||||||
|
struct pcpu_chunk *chunk;
|
||||||
|
int slot, nr_to_pop, ret;
|
||||||
|
|
||||||
|
lockdep_assert_held(&pcpu_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure there are certain number of free populated pages for
|
* Ensure there are certain number of free populated pages for
|
||||||
|
@ -2000,23 +2065,21 @@ retry_pop:
|
||||||
pcpu_atomic_alloc_failed = false;
|
pcpu_atomic_alloc_failed = false;
|
||||||
} else {
|
} else {
|
||||||
nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
|
nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
|
||||||
pcpu_nr_empty_pop_pages[type],
|
pcpu_nr_empty_pop_pages,
|
||||||
0, PCPU_EMPTY_POP_PAGES_HIGH);
|
0, PCPU_EMPTY_POP_PAGES_HIGH);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
|
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot <= pcpu_free_slot; slot++) {
|
||||||
unsigned int nr_unpop = 0, rs, re;
|
unsigned int nr_unpop = 0, rs, re;
|
||||||
|
|
||||||
if (!nr_to_pop)
|
if (!nr_to_pop)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
spin_lock_irq(&pcpu_lock);
|
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
|
||||||
list_for_each_entry(chunk, &pcpu_slot[slot], list) {
|
|
||||||
nr_unpop = chunk->nr_pages - chunk->nr_populated;
|
nr_unpop = chunk->nr_pages - chunk->nr_populated;
|
||||||
if (nr_unpop)
|
if (nr_unpop)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&pcpu_lock);
|
|
||||||
|
|
||||||
if (!nr_unpop)
|
if (!nr_unpop)
|
||||||
continue;
|
continue;
|
||||||
|
@ -2026,12 +2089,13 @@ retry_pop:
|
||||||
chunk->nr_pages) {
|
chunk->nr_pages) {
|
||||||
int nr = min_t(int, re - rs, nr_to_pop);
|
int nr = min_t(int, re - rs, nr_to_pop);
|
||||||
|
|
||||||
|
spin_unlock_irq(&pcpu_lock);
|
||||||
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
|
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
|
||||||
|
cond_resched();
|
||||||
|
spin_lock_irq(&pcpu_lock);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
nr_to_pop -= nr;
|
nr_to_pop -= nr;
|
||||||
spin_lock_irq(&pcpu_lock);
|
|
||||||
pcpu_chunk_populated(chunk, rs, rs + nr);
|
pcpu_chunk_populated(chunk, rs, rs + nr);
|
||||||
spin_unlock_irq(&pcpu_lock);
|
|
||||||
} else {
|
} else {
|
||||||
nr_to_pop = 0;
|
nr_to_pop = 0;
|
||||||
}
|
}
|
||||||
|
@ -2043,30 +2107,133 @@ retry_pop:
|
||||||
|
|
||||||
if (nr_to_pop) {
|
if (nr_to_pop) {
|
||||||
/* ran out of chunks to populate, create a new one and retry */
|
/* ran out of chunks to populate, create a new one and retry */
|
||||||
chunk = pcpu_create_chunk(type, gfp);
|
spin_unlock_irq(&pcpu_lock);
|
||||||
|
chunk = pcpu_create_chunk(gfp);
|
||||||
|
cond_resched();
|
||||||
|
spin_lock_irq(&pcpu_lock);
|
||||||
if (chunk) {
|
if (chunk) {
|
||||||
spin_lock_irq(&pcpu_lock);
|
|
||||||
pcpu_chunk_relocate(chunk, -1);
|
pcpu_chunk_relocate(chunk, -1);
|
||||||
spin_unlock_irq(&pcpu_lock);
|
|
||||||
goto retry_pop;
|
goto retry_pop;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mutex_unlock(&pcpu_alloc_mutex);
|
/**
|
||||||
|
* pcpu_reclaim_populated - scan over to_depopulate chunks and free empty pages
|
||||||
|
*
|
||||||
|
* Scan over chunks in the depopulate list and try to release unused populated
|
||||||
|
* pages back to the system. Depopulated chunks are sidelined to prevent
|
||||||
|
* repopulating these pages unless required. Fully free chunks are reintegrated
|
||||||
|
* and freed accordingly (1 is kept around). If we drop below the empty
|
||||||
|
* populated pages threshold, reintegrate the chunk if it has empty free pages.
|
||||||
|
* Each chunk is scanned in the reverse order to keep populated pages close to
|
||||||
|
* the beginning of the chunk.
|
||||||
|
*
|
||||||
|
* CONTEXT:
|
||||||
|
* pcpu_lock (can be dropped temporarily)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static void pcpu_reclaim_populated(void)
|
||||||
|
{
|
||||||
|
struct pcpu_chunk *chunk;
|
||||||
|
struct pcpu_block_md *block;
|
||||||
|
int i, end;
|
||||||
|
|
||||||
|
lockdep_assert_held(&pcpu_lock);
|
||||||
|
|
||||||
|
restart:
|
||||||
|
/*
|
||||||
|
* Once a chunk is isolated to the to_depopulate list, the chunk is no
|
||||||
|
* longer discoverable to allocations whom may populate pages. The only
|
||||||
|
* other accessor is the free path which only returns area back to the
|
||||||
|
* allocator not touching the populated bitmap.
|
||||||
|
*/
|
||||||
|
while (!list_empty(&pcpu_chunk_lists[pcpu_to_depopulate_slot])) {
|
||||||
|
chunk = list_first_entry(&pcpu_chunk_lists[pcpu_to_depopulate_slot],
|
||||||
|
struct pcpu_chunk, list);
|
||||||
|
WARN_ON(chunk->immutable);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scan chunk's pages in the reverse order to keep populated
|
||||||
|
* pages close to the beginning of the chunk.
|
||||||
|
*/
|
||||||
|
for (i = chunk->nr_pages - 1, end = -1; i >= 0; i--) {
|
||||||
|
/* no more work to do */
|
||||||
|
if (chunk->nr_empty_pop_pages == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* reintegrate chunk to prevent atomic alloc failures */
|
||||||
|
if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_HIGH) {
|
||||||
|
pcpu_reintegrate_chunk(chunk);
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the page is empty and populated, start or
|
||||||
|
* extend the (i, end) range. If i == 0, decrease
|
||||||
|
* i and perform the depopulation to cover the last
|
||||||
|
* (first) page in the chunk.
|
||||||
|
*/
|
||||||
|
block = chunk->md_blocks + i;
|
||||||
|
if (block->contig_hint == PCPU_BITMAP_BLOCK_BITS &&
|
||||||
|
test_bit(i, chunk->populated)) {
|
||||||
|
if (end == -1)
|
||||||
|
end = i;
|
||||||
|
if (i > 0)
|
||||||
|
continue;
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* depopulate if there is an active range */
|
||||||
|
if (end == -1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
spin_unlock_irq(&pcpu_lock);
|
||||||
|
pcpu_depopulate_chunk(chunk, i + 1, end + 1);
|
||||||
|
cond_resched();
|
||||||
|
spin_lock_irq(&pcpu_lock);
|
||||||
|
|
||||||
|
pcpu_chunk_depopulated(chunk, i + 1, end + 1);
|
||||||
|
|
||||||
|
/* reset the range and continue */
|
||||||
|
end = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (chunk->free_bytes == pcpu_unit_size)
|
||||||
|
pcpu_reintegrate_chunk(chunk);
|
||||||
|
else
|
||||||
|
list_move(&chunk->list,
|
||||||
|
&pcpu_chunk_lists[pcpu_sidelined_slot]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pcpu_balance_workfn - manage the amount of free chunks and populated pages
|
* pcpu_balance_workfn - manage the amount of free chunks and populated pages
|
||||||
* @work: unused
|
* @work: unused
|
||||||
*
|
*
|
||||||
* Call __pcpu_balance_workfn() for each chunk type.
|
* For each chunk type, manage the number of fully free chunks and the number of
|
||||||
|
* populated pages. An important thing to consider is when pages are freed and
|
||||||
|
* how they contribute to the global counts.
|
||||||
*/
|
*/
|
||||||
static void pcpu_balance_workfn(struct work_struct *work)
|
static void pcpu_balance_workfn(struct work_struct *work)
|
||||||
{
|
{
|
||||||
enum pcpu_chunk_type type;
|
/*
|
||||||
|
* pcpu_balance_free() is called twice because the first time we may
|
||||||
|
* trim pages in the active pcpu_nr_empty_pop_pages which may cause us
|
||||||
|
* to grow other chunks. This then gives pcpu_reclaim_populated() time
|
||||||
|
* to move fully free chunks to the active list to be freed if
|
||||||
|
* appropriate.
|
||||||
|
*/
|
||||||
|
mutex_lock(&pcpu_alloc_mutex);
|
||||||
|
spin_lock_irq(&pcpu_lock);
|
||||||
|
|
||||||
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
|
pcpu_balance_free(false);
|
||||||
__pcpu_balance_workfn(type);
|
pcpu_reclaim_populated();
|
||||||
|
pcpu_balance_populated();
|
||||||
|
pcpu_balance_free(true);
|
||||||
|
|
||||||
|
spin_unlock_irq(&pcpu_lock);
|
||||||
|
mutex_unlock(&pcpu_alloc_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2085,7 +2252,6 @@ void free_percpu(void __percpu *ptr)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int size, off;
|
int size, off;
|
||||||
bool need_balance = false;
|
bool need_balance = false;
|
||||||
struct list_head *pcpu_slot;
|
|
||||||
|
|
||||||
if (!ptr)
|
if (!ptr)
|
||||||
return;
|
return;
|
||||||
|
@ -2101,19 +2267,24 @@ void free_percpu(void __percpu *ptr)
|
||||||
|
|
||||||
size = pcpu_free_area(chunk, off);
|
size = pcpu_free_area(chunk, off);
|
||||||
|
|
||||||
pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk));
|
|
||||||
|
|
||||||
pcpu_memcg_free_hook(chunk, off, size);
|
pcpu_memcg_free_hook(chunk, off, size);
|
||||||
|
|
||||||
/* if there are more than one fully free chunks, wake up grim reaper */
|
/*
|
||||||
if (chunk->free_bytes == pcpu_unit_size) {
|
* If there are more than one fully free chunks, wake up grim reaper.
|
||||||
|
* If the chunk is isolated, it may be in the process of being
|
||||||
|
* reclaimed. Let reclaim manage cleaning up of that chunk.
|
||||||
|
*/
|
||||||
|
if (!chunk->isolated && chunk->free_bytes == pcpu_unit_size) {
|
||||||
struct pcpu_chunk *pos;
|
struct pcpu_chunk *pos;
|
||||||
|
|
||||||
list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
|
list_for_each_entry(pos, &pcpu_chunk_lists[pcpu_free_slot], list)
|
||||||
if (pos != chunk) {
|
if (pos != chunk) {
|
||||||
need_balance = true;
|
need_balance = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
} else if (pcpu_should_reclaim_chunk(chunk)) {
|
||||||
|
pcpu_isolate_chunk(chunk);
|
||||||
|
need_balance = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_percpu_free_percpu(chunk->base_addr, off, ptr);
|
trace_percpu_free_percpu(chunk->base_addr, off, ptr);
|
||||||
|
@ -2414,7 +2585,6 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
|
||||||
int map_size;
|
int map_size;
|
||||||
unsigned long tmp_addr;
|
unsigned long tmp_addr;
|
||||||
size_t alloc_size;
|
size_t alloc_size;
|
||||||
enum pcpu_chunk_type type;
|
|
||||||
|
|
||||||
#define PCPU_SETUP_BUG_ON(cond) do { \
|
#define PCPU_SETUP_BUG_ON(cond) do { \
|
||||||
if (unlikely(cond)) { \
|
if (unlikely(cond)) { \
|
||||||
|
@ -2528,22 +2698,24 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
|
||||||
pcpu_stats_save_ai(ai);
|
pcpu_stats_save_ai(ai);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate chunk slots. The additional last slot is for
|
* Allocate chunk slots. The slots after the active slots are:
|
||||||
* empty chunks.
|
* sidelined_slot - isolated, depopulated chunks
|
||||||
|
* free_slot - fully free chunks
|
||||||
|
* to_depopulate_slot - isolated, chunks to depopulate
|
||||||
*/
|
*/
|
||||||
pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
|
pcpu_sidelined_slot = __pcpu_size_to_slot(pcpu_unit_size) + 1;
|
||||||
|
pcpu_free_slot = pcpu_sidelined_slot + 1;
|
||||||
|
pcpu_to_depopulate_slot = pcpu_free_slot + 1;
|
||||||
|
pcpu_nr_slots = pcpu_to_depopulate_slot + 1;
|
||||||
pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots *
|
pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots *
|
||||||
sizeof(pcpu_chunk_lists[0]) *
|
sizeof(pcpu_chunk_lists[0]),
|
||||||
PCPU_NR_CHUNK_TYPES,
|
|
||||||
SMP_CACHE_BYTES);
|
SMP_CACHE_BYTES);
|
||||||
if (!pcpu_chunk_lists)
|
if (!pcpu_chunk_lists)
|
||||||
panic("%s: Failed to allocate %zu bytes\n", __func__,
|
panic("%s: Failed to allocate %zu bytes\n", __func__,
|
||||||
pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) *
|
pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]));
|
||||||
PCPU_NR_CHUNK_TYPES);
|
|
||||||
|
|
||||||
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
|
for (i = 0; i < pcpu_nr_slots; i++)
|
||||||
for (i = 0; i < pcpu_nr_slots; i++)
|
INIT_LIST_HEAD(&pcpu_chunk_lists[i]);
|
||||||
INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The end of the static region needs to be aligned with the
|
* The end of the static region needs to be aligned with the
|
||||||
|
@ -2580,7 +2752,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
|
||||||
|
|
||||||
/* link the first chunk in */
|
/* link the first chunk in */
|
||||||
pcpu_first_chunk = chunk;
|
pcpu_first_chunk = chunk;
|
||||||
pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages;
|
pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
|
||||||
pcpu_chunk_relocate(pcpu_first_chunk, -1);
|
pcpu_chunk_relocate(pcpu_first_chunk, -1);
|
||||||
|
|
||||||
/* include all regions of the first chunk */
|
/* include all regions of the first chunk */
|
||||||
|
@ -2733,6 +2905,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
|
||||||
* Related to atom_size, which could be much larger than the unit_size.
|
* Related to atom_size, which could be much larger than the unit_size.
|
||||||
*/
|
*/
|
||||||
last_allocs = INT_MAX;
|
last_allocs = INT_MAX;
|
||||||
|
best_upa = 0;
|
||||||
for (upa = max_upa; upa; upa--) {
|
for (upa = max_upa; upa; upa--) {
|
||||||
int allocs = 0, wasted = 0;
|
int allocs = 0, wasted = 0;
|
||||||
|
|
||||||
|
@ -2759,6 +2932,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
|
||||||
last_allocs = allocs;
|
last_allocs = allocs;
|
||||||
best_upa = upa;
|
best_upa = upa;
|
||||||
}
|
}
|
||||||
|
BUG_ON(!best_upa);
|
||||||
upa = best_upa;
|
upa = best_upa;
|
||||||
|
|
||||||
/* allocate and fill alloc_info */
|
/* allocate and fill alloc_info */
|
||||||
|
|
Загрузка…
Ссылка в новой задаче