Merge branch 'slub-tiny-v1r6' into slab/for-next

Merge my series [1] to deprecate the SLOB allocator.
- Renames CONFIG_SLOB to CONFIG_SLOB_DEPRECATED with deprecation notice.
- The recommended replacement is CONFIG_SLUB, optionally with the new
  CONFIG_SLUB_TINY tweaks for systems with 16MB or less RAM.
- Use cases that stopped working with CONFIG_SLUB_TINY instead of SLOB
  should be reported to linux-mm@kvack.org and slab maintainers,
  otherwise SLOB will be removed in few cycles.

[1] https://lore.kernel.org/all/20221121171202.22080-1-vbabka@suse.cz/
This commit is contained in:
Vlastimil Babka 2022-11-23 15:41:16 +01:00
Родитель 6176665213 149b6fa228
Коммит dc19745ad0
27 изменённых файлов: 397 добавлений и 171 удалений

Просмотреть файл

@ -14,7 +14,8 @@ CONFIG_ARCH_EDB7211=y
CONFIG_ARCH_P720T=y
CONFIG_AEABI=y
# CONFIG_COREDUMP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y

Просмотреть файл

@ -13,7 +13,8 @@ CONFIG_CMDLINE="noinitrd root=/dev/mtdblock2 rootfstype=jffs2 fbcon=rotate:1"
CONFIG_FPE_NWFPE=y
CONFIG_PM=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y

Просмотреть файл

@ -25,7 +25,8 @@ CONFIG_ARM_CLPS711X_CPUIDLE=y
CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_COREDUMP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y

Просмотреть файл

@ -42,7 +42,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
CONFIG_PACKET=y

Просмотреть файл

@ -49,7 +49,8 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_LDM_PARTITION=y
CONFIG_CMDLINE_PARTITION=y
CONFIG_BINFMT_MISC=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_COMPACTION is not set
CONFIG_NET=y
CONFIG_PACKET=y

Просмотреть файл

@ -19,7 +19,8 @@ CONFIG_FPE_NWFPE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y

Просмотреть файл

@ -26,7 +26,8 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
# CONFIG_BLOCK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y

Просмотреть файл

@ -10,7 +10,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="or1ksim"

Просмотреть файл

@ -16,7 +16,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="simple_smp"

Просмотреть файл

@ -25,7 +25,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y

Просмотреть файл

@ -17,7 +17,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y

Просмотреть файл

@ -22,7 +22,8 @@ CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
CONFIG_NONPORTABLE=y

Просмотреть файл

@ -10,7 +10,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_AIO is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set

Просмотреть файл

@ -11,7 +11,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set

Просмотреть файл

@ -21,7 +21,8 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_ELF_CORE is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y

Просмотреть файл

@ -9,7 +9,8 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7706=y
CONFIG_MEMORY_START=0x0c000000

Просмотреть файл

@ -20,7 +20,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y

Просмотреть файл

@ -129,7 +129,11 @@
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
#else
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
/*
@ -336,12 +340,17 @@ enum kmalloc_cache_type {
#endif
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
#else
KMALLOC_CGROUP,
#endif
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
KMALLOC_RECLAIM,
#endif
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
#ifdef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP,
#endif
NR_KMALLOC_TYPES
};

Просмотреть файл

@ -80,8 +80,10 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};

Просмотреть файл

@ -41,6 +41,7 @@ enum stat_item {
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
@ -57,6 +58,7 @@ struct kmem_cache_cpu {
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
@ -88,7 +90,9 @@ struct kmem_cache_order_objects {
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
@ -136,13 +140,15 @@ struct kmem_cache {
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#ifdef CONFIG_SYSFS
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);

Просмотреть файл

@ -7,5 +7,6 @@ CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_SLAB is not set
# CONFIG_SLUB is not set
CONFIG_SLOB=y
# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y

Просмотреть файл

@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety

Просмотреть файл

@ -219,17 +219,43 @@ config SLUB
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
config SLOB
config SLOB_DEPRECATED
depends on EXPERT
bool "SLOB (Simple Allocator)"
bool "SLOB (Simple Allocator - DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. SLUB
recommended as replacement. CONFIG_SLUB_TINY can be considered
on systems with 16MB or less RAM.
If you need SLOB to stay, please contact linux-mm@kvack.org and
people listed in the SLAB ALLOCATOR section of MAINTAINERS file,
with your use case.
SLOB replaces the stock allocator with a drastically simpler
allocator. SLOB is generally more space efficient but
does not perform as well on large systems.
endchoice
config SLOB
bool
default y
depends on SLOB_DEPRECATED
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT
select SLAB_MERGE_DEFAULT
help
Configures the SLUB allocator in a way to achieve minimal memory
footprint, sacrificing scalability, debugging and other features.
This is intended only for the smallest system that had used the
SLOB allocator and is not recommended for systems with more than
16MB RAM.
If unsure, say N.
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
@ -247,7 +273,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist"
depends on SLAB || SLUB
depends on SLAB || (SLUB && !SLUB_TINY)
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
@ -255,7 +281,7 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
depends on SLAB || SLUB
depends on SLAB || (SLUB && !SLUB_TINY)
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
@ -267,7 +293,7 @@ config SLAB_FREELIST_HARDENED
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS
depends on SLUB && SYSFS && !SLUB_TINY
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
@ -279,7 +305,7 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP
depends on SLUB && SMP && !SLUB_TINY
bool "SLUB per cpu partial cache"
help
Per cpu partial caches accelerate objects allocation and freeing

Просмотреть файл

@ -56,7 +56,7 @@ config DEBUG_SLAB
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
depends on SLUB && SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can

Просмотреть файл

@ -217,8 +217,6 @@ struct kmem_cache {
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
unsigned int useroffset;/* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */

Просмотреть файл

@ -143,8 +143,10 @@ int slab_unmergeable(struct kmem_cache *s)
if (s->ctor)
return 1;
#ifdef CONFIG_HARDENED_USERCOPY
if (s->usersize)
return 1;
#endif
/*
* We may have set a slab to be unmergeable during bootstrap.
@ -223,8 +225,10 @@ static struct kmem_cache *create_cache(const char *name,
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
#endif
err = __kmem_cache_create(s, flags);
if (err)
@ -317,7 +321,8 @@ kmem_cache_create_usercopy(const char *name,
flags &= CACHE_CREATE_MASK;
/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
if (!IS_ENABLED(CONFIG_HARDENED_USERCOPY) ||
WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
@ -595,8 +600,8 @@ void kmem_dump_obj(void *object)
ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset;
pr_cont(" pointer offset %lu", ptroffset);
}
if (kp.kp_slab_cache && kp.kp_slab_cache->usersize)
pr_cont(" size %u", kp.kp_slab_cache->usersize);
if (kp.kp_slab_cache && kp.kp_slab_cache->object_size)
pr_cont(" size %u", kp.kp_slab_cache->object_size);
if (kp.kp_ret)
pr_cont(" allocated at %pS\n", kp.kp_ret);
else
@ -640,8 +645,10 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
align = max(align, size);
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
#endif
err = __kmem_cache_create(s, flags);
@ -766,10 +773,16 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_CGROUP_NAME(sz)
#endif
#ifndef CONFIG_SLUB_TINY
#define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz,
#else
#define KMALLOC_RCL_NAME(sz)
#endif
#define INIT_KMALLOC_INFO(__size, __short_size) \
{ \
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
.name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
.size = __size, \
@ -855,7 +868,7 @@ void __init setup_kmalloc_cache_index_table(void)
static void __init
new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
if (type == KMALLOC_RECLAIM) {
if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) {
flags |= SLAB_RECLAIM_ACCOUNT;
} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
if (mem_cgroup_kmem_disabled()) {

422
mm/slub.c
Просмотреть файл

@ -187,6 +187,12 @@ do { \
#define USE_LOCKLESS_FAST_PATH() (false)
#endif
#ifndef CONFIG_SLUB_TINY
#define __fastpath_inline __always_inline
#else
#define __fastpath_inline
#endif
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG_ON
DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
@ -241,6 +247,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/* Enable to log cmpxchg failures */
#undef SLUB_DEBUG_CMPXCHG
#ifndef CONFIG_SLUB_TINY
/*
* Minimum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
@ -253,6 +260,10 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
* sort the partial list by the number of objects in use.
*/
#define MAX_PARTIAL 10
#else
#define MIN_PARTIAL 0
#define MAX_PARTIAL 0
#endif
#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_STORE_USER)
@ -298,7 +309,7 @@ struct track {
enum track_item { TRACK_ALLOC, TRACK_FREE };
#ifdef CONFIG_SYSFS
#ifdef SLAB_SUPPORTS_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
#else
@ -332,10 +343,12 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
*/
static nodemask_t slab_nodes;
#ifndef CONFIG_SLUB_TINY
/*
* Workqueue used for flush_cpu_slab().
*/
static struct workqueue_struct *flushwq;
#endif
/********************************************************************
* Core slab cache functions
@ -381,10 +394,12 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object)
return freelist_dereference(s, object + s->offset);
}
#ifndef CONFIG_SLUB_TINY
static void prefetch_freepointer(const struct kmem_cache *s, void *object)
{
prefetchw(object + s->offset);
}
#endif
/*
* When running under KMSAN, get_freepointer_safe() may return an uninitialized
@ -1402,7 +1417,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
return 1;
}
static noinline int alloc_debug_processing(struct kmem_cache *s,
static noinline bool alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
@ -1414,7 +1429,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
trace(s, slab, object, 1);
set_orig_size(s, object, orig_size);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
return true;
bad:
if (folio_test_slab(slab_folio(slab))) {
@ -1427,7 +1442,7 @@ bad:
slab->inuse = slab->objects;
slab->freelist = NULL;
}
return 0;
return false;
}
static inline int free_consistency_checks(struct kmem_cache *s,
@ -1680,17 +1695,17 @@ static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
static inline
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size) { return 0; }
static inline bool alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size) { return true; }
static inline void free_debug_processing(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr) {}
static inline bool free_debug_processing(struct kmem_cache *s,
struct slab *slab, void *head, void *tail, int *bulk_cnt,
unsigned long addr, depot_stack_handle_t handle) { return true; }
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
static inline void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr) {}
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
@ -1715,11 +1730,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
#ifndef CONFIG_SLUB_TINY
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
}
#endif
#endif /* CONFIG_SLUB_DEBUG */
/*
@ -2257,7 +2274,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!pfmemalloc_match(slab, pc->flags))
continue;
if (kmem_cache_debug(s)) {
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
object = alloc_single_from_partial(s, n, slab,
pc->orig_size);
if (object)
@ -2372,6 +2389,8 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
return get_any_partial(s, pc);
}
#ifndef CONFIG_SLUB_TINY
#ifdef CONFIG_PREEMPTION
/*
* Calculate the next globally unique transaction for disambiguation
@ -2385,7 +2404,7 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
* different cpus.
*/
#define TID_STEP 1
#endif
#endif /* CONFIG_PREEMPTION */
static inline unsigned long next_tid(unsigned long tid)
{
@ -2834,6 +2853,13 @@ static int slub_cpu_dead(unsigned int cpu)
return 0;
}
#else /* CONFIG_SLUB_TINY */
static inline void flush_all_cpus_locked(struct kmem_cache *s) { }
static inline void flush_all(struct kmem_cache *s) { }
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { }
static inline int slub_cpu_dead(unsigned int cpu) { return 0; }
#endif /* CONFIG_SLUB_TINY */
/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
@ -2859,38 +2885,28 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
}
/* Supports checking bulk free of a constructed freelist */
static noinline void free_debug_processing(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
static inline bool free_debug_processing(struct kmem_cache *s,
struct slab *slab, void *head, void *tail, int *bulk_cnt,
unsigned long addr, depot_stack_handle_t handle)
{
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
struct slab *slab_free = NULL;
bool checks_ok = false;
void *object = head;
int cnt = 0;
unsigned long flags;
bool checks_ok = false;
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, slab))
goto out;
}
if (slab->inuse < bulk_cnt) {
if (slab->inuse < *bulk_cnt) {
slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
slab->inuse, bulk_cnt);
slab->inuse, *bulk_cnt);
goto out;
}
next_object:
if (++cnt > bulk_cnt)
if (++cnt > *bulk_cnt)
goto out_cnt;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
@ -2912,61 +2928,22 @@ next_object:
checks_ok = true;
out_cnt:
if (cnt != bulk_cnt)
if (cnt != *bulk_cnt) {
slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
bulk_cnt, cnt);
*bulk_cnt, cnt);
*bulk_cnt = cnt;
}
out:
if (checks_ok) {
void *prior = slab->freelist;
/* Perform the actual freeing while we still hold the locks */
slab->inuse -= cnt;
set_freepointer(s, tail, prior);
slab->freelist = head;
/*
* If the slab is empty, and node's partial list is full,
* it should be discarded anyway no matter it's on full or
* partial list.
*/
if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
slab_free = slab;
if (!prior) {
/* was on full list */
remove_full(s, n, slab);
if (!slab_free) {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
} else if (slab_free) {
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
}
}
if (slab_free) {
/*
* Update the counters while still holding n->list_lock to
* prevent spurious validation warnings
*/
dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
}
spin_unlock_irqrestore(&n->list_lock, flags);
if (!checks_ok)
slab_fix(s, "Object at 0x%p not freed", object);
if (slab_free) {
stat(s, FREE_SLAB);
free_slab(s, slab_free);
}
return checks_ok;
}
#endif /* CONFIG_SLUB_DEBUG */
#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct slab *))
{
@ -2980,12 +2957,12 @@ static unsigned long count_partial(struct kmem_cache_node *n,
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
#ifdef CONFIG_SLUB_DEBUG
static noinline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
{
#ifdef CONFIG_SLUB_DEBUG
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
int node;
@ -3016,8 +2993,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
node, nr_slabs, nr_objs, nr_free);
}
#endif
}
#else /* CONFIG_SLUB_DEBUG */
static inline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { }
#endif
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
{
@ -3027,6 +3007,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
return true;
}
#ifndef CONFIG_SLUB_TINY
/*
* Check the slab->freelist and either transfer the freelist to the
* per cpu freelist or deactivate the slab.
@ -3314,45 +3295,13 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return p;
}
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
*
* The fastpath works by first checking if the lockless freelist can be used.
* If not then __slab_alloc is called for slow processing.
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
struct kmem_cache_cpu *c;
struct slab *slab;
unsigned long tid;
struct obj_cgroup *objcg = NULL;
bool init = false;
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s)
return NULL;
object = kfence_alloc(s, orig_size, gfpflags);
if (unlikely(object))
goto out;
void *object;
redo:
/*
@ -3422,6 +3371,75 @@ redo:
stat(s, ALLOC_FASTPATH);
}
return object;
}
#else /* CONFIG_SLUB_TINY */
static void *__slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
struct partial_context pc;
struct slab *slab;
void *object;
pc.flags = gfpflags;
pc.slab = &slab;
pc.orig_size = orig_size;
object = get_partial(s, node, &pc);
if (object)
return object;
slab = new_slab(s, gfpflags, node);
if (unlikely(!slab)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
object = alloc_single_from_new_slab(s, slab, orig_size);
return object;
}
#endif /* CONFIG_SLUB_TINY */
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
*
* The fastpath works by first checking if the lockless freelist can be used.
* If not then __slab_alloc is called for slow processing.
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
struct obj_cgroup *objcg = NULL;
bool init = false;
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s)
return NULL;
object = kfence_alloc(s, orig_size, gfpflags);
if (unlikely(object))
goto out;
object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
maybe_wipe_obj_freeptr(s, object);
init = slab_want_init_on_alloc(gfpflags, s);
@ -3435,13 +3453,13 @@ out:
return object;
}
static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, unsigned long addr, size_t orig_size)
{
return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
}
static __always_inline
static __fastpath_inline
void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
{
@ -3483,6 +3501,67 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
struct slab *slab_free = NULL;
int cnt = bulk_cnt;
unsigned long flags;
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) {
void *prior = slab->freelist;
/* Perform the actual freeing while we still hold the locks */
slab->inuse -= cnt;
set_freepointer(s, tail, prior);
slab->freelist = head;
/*
* If the slab is empty, and node's partial list is full,
* it should be discarded anyway no matter it's on full or
* partial list.
*/
if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
slab_free = slab;
if (!prior) {
/* was on full list */
remove_full(s, n, slab);
if (!slab_free) {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
} else if (slab_free) {
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
}
}
if (slab_free) {
/*
* Update the counters while still holding n->list_lock to
* prevent spurious validation warnings
*/
dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
}
spin_unlock_irqrestore(&n->list_lock, flags);
if (slab_free) {
stat(s, FREE_SLAB);
free_slab(s, slab_free);
}
}
/*
* Slow path handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
@ -3508,8 +3587,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
if (kfence_free(head))
return;
if (kmem_cache_debug(s)) {
free_debug_processing(s, slab, head, tail, cnt, addr);
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
free_to_partial_list(s, slab, head, tail, cnt, addr);
return;
}
@ -3609,6 +3688,7 @@ slab_empty:
discard_slab(s, slab);
}
#ifndef CONFIG_SLUB_TINY
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
@ -3683,8 +3763,18 @@ redo:
}
stat(s, FREE_FASTPATH);
}
#else /* CONFIG_SLUB_TINY */
static void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
__slab_free(s, slab, head, tail_obj, cnt, addr);
}
#endif /* CONFIG_SLUB_TINY */
static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, void **p, int cnt,
unsigned long addr)
{
@ -3817,18 +3907,13 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
/* Note that interrupts must be enabled when calling this function. */
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
#ifndef CONFIG_SLUB_TINY
static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg)
{
struct kmem_cache_cpu *c;
int i;
struct obj_cgroup *objcg = NULL;
/* memcg and kmem_cache debug support */
s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
if (unlikely(!s))
return false;
/*
* Drain objects in the per cpu slab, while disabling local
* IRQs, which protects against PREEMPT and interrupts
@ -3882,18 +3967,71 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
local_unlock_irq(&s->cpu_slab->lock);
slub_put_cpu_ptr(s->cpu_slab);
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size);
return i;
error:
slub_put_cpu_ptr(s->cpu_slab);
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
#else /* CONFIG_SLUB_TINY */
static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg)
{
int i;
for (i = 0; i < size; i++) {
void *object = kfence_alloc(s, s->object_size, flags);
if (unlikely(object)) {
p[i] = object;
continue;
}
p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
_RET_IP_, s->object_size);
if (unlikely(!p[i]))
goto error;
maybe_wipe_obj_freeptr(s, p[i]);
}
return i;
error:
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
#endif /* CONFIG_SLUB_TINY */
/* Note that interrupts must be enabled when calling this function. */
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
int i;
struct obj_cgroup *objcg = NULL;
if (!size)
return 0;
/* memcg and kmem_cache debug support */
s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
if (unlikely(!s))
return 0;
i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
if (i != 0)
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size);
return i;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
@ -3918,7 +4056,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk);
* take the list_lock.
*/
static unsigned int slub_min_order;
static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
static unsigned int slub_max_order =
IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER;
static unsigned int slub_min_objects;
/*
@ -4049,6 +4188,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
#endif
}
#ifndef CONFIG_SLUB_TINY
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
@ -4069,6 +4209,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
return 1;
}
#else
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
return 1;
}
#endif /* CONFIG_SLUB_TINY */
static struct kmem_cache *kmem_cache_node;
@ -4131,7 +4277,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
void __kmem_cache_release(struct kmem_cache *s)
{
cache_random_seq_destroy(s);
#ifndef CONFIG_SLUB_TINY
free_percpu(s->cpu_slab);
#endif
free_kmem_cache_nodes(s);
}
@ -4909,8 +5057,10 @@ void __init kmem_cache_init(void)
void __init kmem_cache_init_late(void)
{
#ifndef CONFIG_SLUB_TINY
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
WARN_ON(!flushwq);
#endif
}
struct kmem_cache *
@ -4961,7 +5111,7 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
return 0;
}
#ifdef CONFIG_SYSFS
#ifdef SLAB_SUPPORTS_SYSFS
static int count_inuse(struct slab *slab)
{
return slab->inuse;
@ -5219,7 +5369,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_SLUB_DEBUG */
#ifdef CONFIG_SYSFS
#ifdef SLAB_SUPPORTS_SYSFS
enum slab_stat_type {
SL_ALL, /* All slabs */
SL_PARTIAL, /* Only partially allocated slabs */
@ -5539,11 +5689,13 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
SLAB_ATTR_RO(cache_dma);
#endif
#ifdef CONFIG_HARDENED_USERCOPY
static ssize_t usersize_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%u\n", s->usersize);
}
SLAB_ATTR_RO(usersize);
#endif
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
{
@ -5854,7 +6006,9 @@ static struct attribute *slab_attrs[] = {
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,
#endif
#ifdef CONFIG_HARDENED_USERCOPY
&usersize_attr.attr,
#endif
#ifdef CONFIG_KFENCE
&skip_kfence_attr.attr,
#endif
@ -6101,7 +6255,7 @@ static int __init slab_sysfs_init(void)
return 0;
}
late_initcall(slab_sysfs_init);
#endif /* CONFIG_SYSFS */
#endif /* SLAB_SUPPORTS_SYSFS */
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
static int slab_debugfs_show(struct seq_file *seq, void *v)