diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d3a38d687104..b8b9084527b1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -44,6 +44,16 @@ /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +/* + * Percpu allocator can serve percpu allocations before slab is + * initialized which allows slab to depend on the percpu allocator. + * The following two parameters decide how much resource to + * preallocate for this. Keep PERCPU_DYNAMIC_RESERVE equal to or + * larger than PERCPU_DYNAMIC_EARLY_SIZE. + */ +#define PERCPU_DYNAMIC_EARLY_SLOTS 128 +#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) + /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy * back on the first chunk for dynamic percpu allocation if arch is @@ -104,16 +114,11 @@ extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, int nr_units); extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); -extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( - size_t reserved_size, ssize_t dyn_size, - size_t atom_size, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn); - extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK -extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, +extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, pcpu_fc_alloc_fn_t alloc_fn, @@ -140,6 +145,7 @@ extern bool is_kernel_percpu_address(unsigned long addr); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void __init setup_per_cpu_areas(void); #endif +extern void __init percpu_init_late(void); #else /* CONFIG_SMP */ @@ -153,6 +159,8 @@ static inline bool is_kernel_percpu_address(unsigned long addr) static inline void __init setup_per_cpu_areas(void) { } +static inline void __init percpu_init_late(void) { } + static inline void *pcpu_lpage_remapped(void *kaddr) { return NULL; diff --git a/init/main.c b/init/main.c index a42fdf4aeba9..4ddb53f04f2a 100644 --- a/init/main.c +++ b/init/main.c @@ -532,6 +532,7 @@ static void __init mm_init(void) page_cgroup_init_flatmem(); mem_init(); kmem_cache_init(); + percpu_init_late(); pgtable_cache_init(); vmalloc_init(); } diff --git a/mm/percpu.c b/mm/percpu.c index 6470e7710231..e61dc2cc5873 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -282,6 +282,9 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, */ static void *pcpu_mem_alloc(size_t size) { + if (WARN_ON_ONCE(!slab_is_available())) + return NULL; + if (size <= PAGE_SIZE) return kzalloc(size, GFP_KERNEL); else { @@ -392,13 +395,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) old_size = chunk->map_alloc * sizeof(chunk->map[0]); memcpy(new, chunk->map, old_size); - /* - * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is - * one of the first chunks and still using static map. - */ - if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) - old = chunk->map; - chunk->map_alloc = new_alloc; chunk->map = new; new = NULL; @@ -604,7 +600,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) { struct pcpu_chunk *chunk; - chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); + chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); if (!chunk) return NULL; @@ -1013,20 +1009,6 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) return page_to_phys(pcpu_addr_to_page(addr)); } -static inline size_t pcpu_calc_fc_sizes(size_t static_size, - size_t reserved_size, - ssize_t *dyn_sizep) -{ - size_t size_sum; - - size_sum = PFN_ALIGN(static_size + reserved_size + - (*dyn_sizep >= 0 ? *dyn_sizep : 0)); - if (*dyn_sizep != 0) - *dyn_sizep = size_sum - static_size - reserved_size; - - return size_sum; -} - /** * pcpu_alloc_alloc_info - allocate percpu allocation info * @nr_groups: the number of groups @@ -1085,7 +1067,7 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) /** * pcpu_build_alloc_info - build alloc_info considering distances between CPUs * @reserved_size: the size of reserved percpu area in bytes - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @dyn_size: minimum free size for dynamic allocation in bytes * @atom_size: allocation atom size * @cpu_distance_fn: callback to determine distance between cpus, optional * @@ -1103,8 +1085,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) * On success, pointer to the new allocation_info is returned. On * failure, ERR_PTR value is returned. */ -struct pcpu_alloc_info * __init pcpu_build_alloc_info( - size_t reserved_size, ssize_t dyn_size, +static struct pcpu_alloc_info * __init pcpu_build_alloc_info( + size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn) { @@ -1123,13 +1105,17 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info( memset(group_map, 0, sizeof(group_map)); memset(group_cnt, 0, sizeof(group_cnt)); + /* calculate size_sum and ensure dyn_size is enough for early alloc */ + size_sum = PFN_ALIGN(static_size + reserved_size + + max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); + dyn_size = size_sum - static_size - reserved_size; + /* * Determine min_unit_size, alloc_size and max_upa such that * alloc_size is multiple of atom_size and is the smallest * which can accomodate 4k aligned segments which are equal to * or larger than min_unit_size. */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); alloc_size = roundup(min_unit_size, atom_size); @@ -1350,7 +1336,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, void *base_addr) { static char cpus_buf[4096] __initdata; - static int smap[2], dmap[2]; + static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; + static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; size_t dyn_size = ai->dyn_size; size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; @@ -1373,14 +1360,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, } while (0) /* sanity checks */ - BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || - ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); PCPU_SETUP_BUG_ON(!ai->static_size); PCPU_SETUP_BUG_ON(!base_addr); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); + PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); /* process group information and build config tables accordingly */ @@ -1532,7 +1518,7 @@ early_param("percpu_alloc", percpu_alloc_setup); /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @reserved_size: the size of reserved percpu area in bytes - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @dyn_size: minimum free size for dynamic allocation in bytes * @atom_size: allocation atom size * @cpu_distance_fn: callback to determine distance between cpus, optional * @alloc_fn: function to allocate percpu page @@ -1553,10 +1539,7 @@ early_param("percpu_alloc", percpu_alloc_setup); * vmalloc space is not orders of magnitude larger than distances * between node memory addresses (ie. 32bit NUMA machines). * - * When @dyn_size is positive, dynamic area might be larger than - * specified to fill page alignment. When @dyn_size is auto, - * @dyn_size is just big enough to fill page alignment after static - * and reserved areas. + * @dyn_size specifies the minimum dynamic area size. * * If the needed size is smaller than the minimum or specified unit * size, the leftover is returned using @free_fn. @@ -1564,7 +1547,7 @@ early_param("percpu_alloc", percpu_alloc_setup); * RETURNS: * 0 on success, -errno on failure. */ -int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, +int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, pcpu_fc_alloc_fn_t alloc_fn, @@ -1695,7 +1678,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); - ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); + ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL); if (IS_ERR(ai)) return PTR_ERR(ai); BUG_ON(ai->nr_groups != 1); @@ -1821,3 +1804,33 @@ void __init setup_per_cpu_areas(void) __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; } #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ + +/* + * First and reserved chunks are initialized with temporary allocation + * map in initdata so that they can be used before slab is online. + * This function is called after slab is brought up and replaces those + * with properly allocated maps. + */ +void __init percpu_init_late(void) +{ + struct pcpu_chunk *target_chunks[] = + { pcpu_first_chunk, pcpu_reserved_chunk, NULL }; + struct pcpu_chunk *chunk; + unsigned long flags; + int i; + + for (i = 0; (chunk = target_chunks[i]); i++) { + int *map; + const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]); + + BUILD_BUG_ON(size > PAGE_SIZE); + + map = pcpu_mem_alloc(size); + BUG_ON(!map); + + spin_lock_irqsave(&pcpu_lock, flags); + memcpy(map, chunk->map, size); + chunk->map = map; + spin_unlock_irqrestore(&pcpu_lock, flags); + } +}