Merge branch 'for-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu changes from Tejun Heo: "The percpu allocation is now popular enough for the extremely naive range allocator to cause scalability issues. The existing allocator linearly scanned the allocation map on both alloc and free without making use of hint or anything. Al reimplemented the range allocator so that it can use binary search instead of linear scan during free and alloc path uses simple hinting to avoid scanning in common cases. Combined, the new allocator resolves the scalability issue percpu allocator was showing during container benchmark workload" * 'for-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: renew the max_contig if we merge the head and previous block percpu: allocation size should be even percpu: speed alloc_pcpu_area() up percpu: store offsets instead of lengths in ->map[] perpcu: fold pcpu_split_block() into the only caller
This commit is contained in:
Коммит
cf6fafcf05
210
mm/percpu.c
210
mm/percpu.c
|
@ -102,10 +102,11 @@ struct pcpu_chunk {
|
||||||
int free_size; /* free bytes in the chunk */
|
int free_size; /* free bytes in the chunk */
|
||||||
int contig_hint; /* max contiguous size hint */
|
int contig_hint; /* max contiguous size hint */
|
||||||
void *base_addr; /* base address of this chunk */
|
void *base_addr; /* base address of this chunk */
|
||||||
int map_used; /* # of map entries used */
|
int map_used; /* # of map entries used before the sentry */
|
||||||
int map_alloc; /* # of map entries allocated */
|
int map_alloc; /* # of map entries allocated */
|
||||||
int *map; /* allocation map */
|
int *map; /* allocation map */
|
||||||
void *data; /* chunk data */
|
void *data; /* chunk data */
|
||||||
|
int first_free; /* no free below this */
|
||||||
bool immutable; /* no [de]population allowed */
|
bool immutable; /* no [de]population allowed */
|
||||||
unsigned long populated[]; /* populated bitmap */
|
unsigned long populated[]; /* populated bitmap */
|
||||||
};
|
};
|
||||||
|
@ -356,11 +357,11 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
|
||||||
{
|
{
|
||||||
int new_alloc;
|
int new_alloc;
|
||||||
|
|
||||||
if (chunk->map_alloc >= chunk->map_used + 2)
|
if (chunk->map_alloc >= chunk->map_used + 3)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
new_alloc = PCPU_DFL_MAP_ALLOC;
|
new_alloc = PCPU_DFL_MAP_ALLOC;
|
||||||
while (new_alloc < chunk->map_used + 2)
|
while (new_alloc < chunk->map_used + 3)
|
||||||
new_alloc *= 2;
|
new_alloc *= 2;
|
||||||
|
|
||||||
return new_alloc;
|
return new_alloc;
|
||||||
|
@ -417,48 +418,6 @@ out_unlock:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* pcpu_split_block - split a map block
|
|
||||||
* @chunk: chunk of interest
|
|
||||||
* @i: index of map block to split
|
|
||||||
* @head: head size in bytes (can be 0)
|
|
||||||
* @tail: tail size in bytes (can be 0)
|
|
||||||
*
|
|
||||||
* Split the @i'th map block into two or three blocks. If @head is
|
|
||||||
* non-zero, @head bytes block is inserted before block @i moving it
|
|
||||||
* to @i+1 and reducing its size by @head bytes.
|
|
||||||
*
|
|
||||||
* If @tail is non-zero, the target block, which can be @i or @i+1
|
|
||||||
* depending on @head, is reduced by @tail bytes and @tail byte block
|
|
||||||
* is inserted after the target block.
|
|
||||||
*
|
|
||||||
* @chunk->map must have enough free slots to accommodate the split.
|
|
||||||
*
|
|
||||||
* CONTEXT:
|
|
||||||
* pcpu_lock.
|
|
||||||
*/
|
|
||||||
static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
|
|
||||||
int head, int tail)
|
|
||||||
{
|
|
||||||
int nr_extra = !!head + !!tail;
|
|
||||||
|
|
||||||
BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
|
|
||||||
|
|
||||||
/* insert new subblocks */
|
|
||||||
memmove(&chunk->map[i + nr_extra], &chunk->map[i],
|
|
||||||
sizeof(chunk->map[0]) * (chunk->map_used - i));
|
|
||||||
chunk->map_used += nr_extra;
|
|
||||||
|
|
||||||
if (head) {
|
|
||||||
chunk->map[i + 1] = chunk->map[i] - head;
|
|
||||||
chunk->map[i++] = head;
|
|
||||||
}
|
|
||||||
if (tail) {
|
|
||||||
chunk->map[i++] -= tail;
|
|
||||||
chunk->map[i] = tail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pcpu_alloc_area - allocate area from a pcpu_chunk
|
* pcpu_alloc_area - allocate area from a pcpu_chunk
|
||||||
* @chunk: chunk of interest
|
* @chunk: chunk of interest
|
||||||
|
@ -483,19 +442,27 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
|
||||||
int oslot = pcpu_chunk_slot(chunk);
|
int oslot = pcpu_chunk_slot(chunk);
|
||||||
int max_contig = 0;
|
int max_contig = 0;
|
||||||
int i, off;
|
int i, off;
|
||||||
|
bool seen_free = false;
|
||||||
|
int *p;
|
||||||
|
|
||||||
for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
|
for (i = chunk->first_free, p = chunk->map + i; i < chunk->map_used; i++, p++) {
|
||||||
bool is_last = i + 1 == chunk->map_used;
|
|
||||||
int head, tail;
|
int head, tail;
|
||||||
|
int this_size;
|
||||||
|
|
||||||
|
off = *p;
|
||||||
|
if (off & 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
/* extra for alignment requirement */
|
/* extra for alignment requirement */
|
||||||
head = ALIGN(off, align) - off;
|
head = ALIGN(off, align) - off;
|
||||||
BUG_ON(i == 0 && head != 0);
|
|
||||||
|
|
||||||
if (chunk->map[i] < 0)
|
this_size = (p[1] & ~1) - off;
|
||||||
continue;
|
if (this_size < head + size) {
|
||||||
if (chunk->map[i] < head + size) {
|
if (!seen_free) {
|
||||||
max_contig = max(chunk->map[i], max_contig);
|
chunk->first_free = i;
|
||||||
|
seen_free = true;
|
||||||
|
}
|
||||||
|
max_contig = max(this_size, max_contig);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -505,44 +472,59 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
|
||||||
* than sizeof(int), which is very small but isn't too
|
* than sizeof(int), which is very small but isn't too
|
||||||
* uncommon for percpu allocations.
|
* uncommon for percpu allocations.
|
||||||
*/
|
*/
|
||||||
if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
|
if (head && (head < sizeof(int) || !(p[-1] & 1))) {
|
||||||
if (chunk->map[i - 1] > 0)
|
*p = off += head;
|
||||||
chunk->map[i - 1] += head;
|
if (p[-1] & 1)
|
||||||
else {
|
|
||||||
chunk->map[i - 1] -= head;
|
|
||||||
chunk->free_size -= head;
|
chunk->free_size -= head;
|
||||||
}
|
else
|
||||||
chunk->map[i] -= head;
|
max_contig = max(*p - p[-1], max_contig);
|
||||||
off += head;
|
this_size -= head;
|
||||||
head = 0;
|
head = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if tail is small, just keep it around */
|
/* if tail is small, just keep it around */
|
||||||
tail = chunk->map[i] - head - size;
|
tail = this_size - head - size;
|
||||||
if (tail < sizeof(int))
|
if (tail < sizeof(int)) {
|
||||||
tail = 0;
|
tail = 0;
|
||||||
|
size = this_size - head;
|
||||||
|
}
|
||||||
|
|
||||||
/* split if warranted */
|
/* split if warranted */
|
||||||
if (head || tail) {
|
if (head || tail) {
|
||||||
pcpu_split_block(chunk, i, head, tail);
|
int nr_extra = !!head + !!tail;
|
||||||
|
|
||||||
|
/* insert new subblocks */
|
||||||
|
memmove(p + nr_extra + 1, p + 1,
|
||||||
|
sizeof(chunk->map[0]) * (chunk->map_used - i));
|
||||||
|
chunk->map_used += nr_extra;
|
||||||
|
|
||||||
if (head) {
|
if (head) {
|
||||||
i++;
|
if (!seen_free) {
|
||||||
off += head;
|
chunk->first_free = i;
|
||||||
max_contig = max(chunk->map[i - 1], max_contig);
|
seen_free = true;
|
||||||
|
}
|
||||||
|
*++p = off += head;
|
||||||
|
++i;
|
||||||
|
max_contig = max(head, max_contig);
|
||||||
|
}
|
||||||
|
if (tail) {
|
||||||
|
p[1] = off + size;
|
||||||
|
max_contig = max(tail, max_contig);
|
||||||
}
|
}
|
||||||
if (tail)
|
|
||||||
max_contig = max(chunk->map[i + 1], max_contig);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!seen_free)
|
||||||
|
chunk->first_free = i + 1;
|
||||||
|
|
||||||
/* update hint and mark allocated */
|
/* update hint and mark allocated */
|
||||||
if (is_last)
|
if (i + 1 == chunk->map_used)
|
||||||
chunk->contig_hint = max_contig; /* fully scanned */
|
chunk->contig_hint = max_contig; /* fully scanned */
|
||||||
else
|
else
|
||||||
chunk->contig_hint = max(chunk->contig_hint,
|
chunk->contig_hint = max(chunk->contig_hint,
|
||||||
max_contig);
|
max_contig);
|
||||||
|
|
||||||
chunk->free_size -= chunk->map[i];
|
chunk->free_size -= size;
|
||||||
chunk->map[i] = -chunk->map[i];
|
*p |= 1;
|
||||||
|
|
||||||
pcpu_chunk_relocate(chunk, oslot);
|
pcpu_chunk_relocate(chunk, oslot);
|
||||||
return off;
|
return off;
|
||||||
|
@ -570,34 +552,50 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
|
||||||
static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
|
static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
|
||||||
{
|
{
|
||||||
int oslot = pcpu_chunk_slot(chunk);
|
int oslot = pcpu_chunk_slot(chunk);
|
||||||
int i, off;
|
int off = 0;
|
||||||
|
unsigned i, j;
|
||||||
|
int to_free = 0;
|
||||||
|
int *p;
|
||||||
|
|
||||||
for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++]))
|
freeme |= 1; /* we are searching for <given offset, in use> pair */
|
||||||
if (off == freeme)
|
|
||||||
break;
|
i = 0;
|
||||||
|
j = chunk->map_used;
|
||||||
|
while (i != j) {
|
||||||
|
unsigned k = (i + j) / 2;
|
||||||
|
off = chunk->map[k];
|
||||||
|
if (off < freeme)
|
||||||
|
i = k + 1;
|
||||||
|
else if (off > freeme)
|
||||||
|
j = k;
|
||||||
|
else
|
||||||
|
i = j = k;
|
||||||
|
}
|
||||||
BUG_ON(off != freeme);
|
BUG_ON(off != freeme);
|
||||||
BUG_ON(chunk->map[i] > 0);
|
|
||||||
|
|
||||||
chunk->map[i] = -chunk->map[i];
|
if (i < chunk->first_free)
|
||||||
chunk->free_size += chunk->map[i];
|
chunk->first_free = i;
|
||||||
|
|
||||||
|
p = chunk->map + i;
|
||||||
|
*p = off &= ~1;
|
||||||
|
chunk->free_size += (p[1] & ~1) - off;
|
||||||
|
|
||||||
/* merge with previous? */
|
|
||||||
if (i > 0 && chunk->map[i - 1] >= 0) {
|
|
||||||
chunk->map[i - 1] += chunk->map[i];
|
|
||||||
chunk->map_used--;
|
|
||||||
memmove(&chunk->map[i], &chunk->map[i + 1],
|
|
||||||
(chunk->map_used - i) * sizeof(chunk->map[0]));
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
/* merge with next? */
|
/* merge with next? */
|
||||||
if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) {
|
if (!(p[1] & 1))
|
||||||
chunk->map[i] += chunk->map[i + 1];
|
to_free++;
|
||||||
chunk->map_used--;
|
/* merge with previous? */
|
||||||
memmove(&chunk->map[i + 1], &chunk->map[i + 2],
|
if (i > 0 && !(p[-1] & 1)) {
|
||||||
(chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
|
to_free++;
|
||||||
|
i--;
|
||||||
|
p--;
|
||||||
|
}
|
||||||
|
if (to_free) {
|
||||||
|
chunk->map_used -= to_free;
|
||||||
|
memmove(p + 1, p + 1 + to_free,
|
||||||
|
(chunk->map_used - i) * sizeof(chunk->map[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
|
chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint);
|
||||||
pcpu_chunk_relocate(chunk, oslot);
|
pcpu_chunk_relocate(chunk, oslot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -617,7 +615,9 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
|
chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
|
||||||
chunk->map[chunk->map_used++] = pcpu_unit_size;
|
chunk->map[0] = 0;
|
||||||
|
chunk->map[1] = pcpu_unit_size | 1;
|
||||||
|
chunk->map_used = 1;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&chunk->list);
|
INIT_LIST_HEAD(&chunk->list);
|
||||||
chunk->free_size = pcpu_unit_size;
|
chunk->free_size = pcpu_unit_size;
|
||||||
|
@ -713,6 +713,16 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
void __percpu *ptr;
|
void __percpu *ptr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want the lowest bit of offset available for in-use/free
|
||||||
|
* indicator, so force >= 16bit alignment and make size even.
|
||||||
|
*/
|
||||||
|
if (unlikely(align < 2))
|
||||||
|
align = 2;
|
||||||
|
|
||||||
|
if (unlikely(size & 1))
|
||||||
|
size++;
|
||||||
|
|
||||||
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
|
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
|
||||||
WARN(true, "illegal size (%zu) or align (%zu) for "
|
WARN(true, "illegal size (%zu) or align (%zu) for "
|
||||||
"percpu allocation\n", size, align);
|
"percpu allocation\n", size, align);
|
||||||
|
@ -1343,9 +1353,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
|
||||||
}
|
}
|
||||||
schunk->contig_hint = schunk->free_size;
|
schunk->contig_hint = schunk->free_size;
|
||||||
|
|
||||||
schunk->map[schunk->map_used++] = -ai->static_size;
|
schunk->map[0] = 1;
|
||||||
|
schunk->map[1] = ai->static_size;
|
||||||
|
schunk->map_used = 1;
|
||||||
if (schunk->free_size)
|
if (schunk->free_size)
|
||||||
schunk->map[schunk->map_used++] = schunk->free_size;
|
schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size);
|
||||||
|
else
|
||||||
|
schunk->map[1] |= 1;
|
||||||
|
|
||||||
/* init dynamic chunk if necessary */
|
/* init dynamic chunk if necessary */
|
||||||
if (dyn_size) {
|
if (dyn_size) {
|
||||||
|
@ -1358,8 +1372,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
|
||||||
bitmap_fill(dchunk->populated, pcpu_unit_pages);
|
bitmap_fill(dchunk->populated, pcpu_unit_pages);
|
||||||
|
|
||||||
dchunk->contig_hint = dchunk->free_size = dyn_size;
|
dchunk->contig_hint = dchunk->free_size = dyn_size;
|
||||||
dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
|
dchunk->map[0] = 1;
|
||||||
dchunk->map[dchunk->map_used++] = dchunk->free_size;
|
dchunk->map[1] = pcpu_reserved_chunk_limit;
|
||||||
|
dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
|
||||||
|
dchunk->map_used = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* link the first chunk in */
|
/* link the first chunk in */
|
||||||
|
|
Загрузка…
Ссылка в новой задаче