ef62c8ff1d
Patch series "mm/page_owner: Extend page_owner to show memcg information", v4. While debugging the constant increase in percpu memory consumption on a system that spawned large number of containers, it was found that a lot of offline mem_cgroup structures remained in place without being freed. Further investigation indicated that those mem_cgroup structures were pinned by some pages. In order to find out what those pages are, the existing page_owner debugging tool is extended to show memory cgroup information and whether those memcgs are offline or not. With the enhanced page_owner tool, the following is a typical page that pinned the mem_cgroup structure in my test case: Page allocated via order 0, mask 0x1100cca(GFP_HIGHUSER_MOVABLE), pid 162970 (podman), ts 1097761405537 ns, free_ts 1097760838089 ns PFN 1925700 type Movable Block 3761 type Movable Flags 0x17ffffc00c001c(uptodate|dirty|lru|reclaim|swapbacked|node=0|zone=2|lastcpupid=0x1fffff) prep_new_page+0xac/0xe0 get_page_from_freelist+0x1327/0x14d0 __alloc_pages+0x191/0x340 alloc_pages_vma+0x84/0x250 shmem_alloc_page+0x3f/0x90 shmem_alloc_and_acct_page+0x76/0x1c0 shmem_getpage_gfp+0x281/0x940 shmem_write_begin+0x36/0xe0 generic_perform_write+0xed/0x1d0 __generic_file_write_iter+0xdc/0x1b0 generic_file_write_iter+0x5d/0xb0 new_sync_write+0x11f/0x1b0 vfs_write+0x1ba/0x2a0 ksys_write+0x59/0xd0 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Charged to offline memcg libpod-conmon-15e4f9c758422306b73b2dd99f9d50a5ea53cbb16b4a13a2c2308a4253cc0ec8. So the page was not freed because it was part of a shmem segment. That is useful information that can help users to diagnose similar problems. With cgroup v1, /proc/cgroups can be read to find out the total number of memory cgroups (online + offline). With cgroup v2, the cgroup.stat of the root cgroup can be read to find the number of dying cgroups (most likely pinned by dying memcgs). The page_owner feature is not supposed to be enabled for production system due to its memory overhead. However, if it is suspected that dying memcgs are increasing over time, a test environment with page_owner enabled can then be set up with appropriate workload for further analysis on what may be causing the increasing number of dying memcgs. This patch (of 4): For *scnprintf(), vsnprintf() is always called even if the input size is 0. That is a waste of time, so just return 0 in this case. Note that vsnprintf() will never return -1 to indicate an error. So skipping the call to vsnprintf() when size is 0 will have no functional impact at all. Link: https://lkml.kernel.org/r/20220202203036.744010-1-longman@redhat.com Link: https://lkml.kernel.org/r/20220202203036.744010-2-longman@redhat.com Signed-off-by: Waiman Long <longman@redhat.com> Acked-by: David Rientjes <rientjes@google.com> Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> Acked-by: Roman Gushchin <guro@fb.com> Acked-by: Rafael Aquini <aquini@redhat.com> Acked-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Steven Rostedt (Google) <rostedt@goodmis.org> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk> Cc: Ira Weiny <ira.weiny@intel.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
||
---|---|---|
.. | ||
842 | ||
crypto | ||
dim | ||
fonts | ||
kunit | ||
livepatch | ||
lz4 | ||
lzo | ||
math | ||
mpi | ||
pldmfw | ||
raid6 | ||
reed_solomon | ||
test_fortify | ||
vdso | ||
xz | ||
zlib_deflate | ||
zlib_dfltcc | ||
zlib_inflate | ||
zstd | ||
.gitignore | ||
Kconfig | ||
Kconfig.debug | ||
Kconfig.kasan | ||
Kconfig.kcsan | ||
Kconfig.kfence | ||
Kconfig.kgdb | ||
Kconfig.ubsan | ||
Makefile | ||
argv_split.c | ||
ashldi3.c | ||
ashrdi3.c | ||
asn1_decoder.c | ||
asn1_encoder.c | ||
assoc_array.c | ||
atomic64.c | ||
atomic64_test.c | ||
audit.c | ||
bcd.c | ||
bch.c | ||
bitfield_kunit.c | ||
bitmap.c | ||
bitrev.c | ||
bootconfig.c | ||
bsearch.c | ||
btree.c | ||
bucket_locks.c | ||
bug.c | ||
build_OID_registry | ||
buildid.c | ||
bust_spinlocks.c | ||
check_signature.c | ||
checksum.c | ||
clz_ctz.c | ||
clz_tab.c | ||
cmdline.c | ||
cmdline_kunit.c | ||
cmpdi2.c | ||
compat_audit.c | ||
cpu_rmap.c | ||
cpumask.c | ||
crc-ccitt.c | ||
crc-itu-t.c | ||
crc-t10dif.c | ||
crc4.c | ||
crc7.c | ||
crc8.c | ||
crc16.c | ||
crc32.c | ||
crc32defs.h | ||
crc32test.c | ||
crc64.c | ||
ctype.c | ||
debug_info.c | ||
debug_locks.c | ||
debugobjects.c | ||
dec_and_lock.c | ||
decompress.c | ||
decompress_bunzip2.c | ||
decompress_inflate.c | ||
decompress_unlz4.c | ||
decompress_unlzma.c | ||
decompress_unlzo.c | ||
decompress_unxz.c | ||
decompress_unzstd.c | ||
devmem_is_allowed.c | ||
devres.c | ||
digsig.c | ||
dump_stack.c | ||
dynamic_debug.c | ||
dynamic_queue_limits.c | ||
earlycpio.c | ||
errname.c | ||
error-inject.c | ||
errseq.c | ||
extable.c | ||
fault-inject-usercopy.c | ||
fault-inject.c | ||
fdt.c | ||
fdt_addresses.c | ||
fdt_empty_tree.c | ||
fdt_ro.c | ||
fdt_rw.c | ||
fdt_strerror.c | ||
fdt_sw.c | ||
fdt_wip.c | ||
find_bit.c | ||
find_bit_benchmark.c | ||
flex_proportions.c | ||
gen_crc32table.c | ||
gen_crc64table.c | ||
genalloc.c | ||
generic-radix-tree.c | ||
glob.c | ||
globtest.c | ||
hexdump.c | ||
hweight.c | ||
idr.c | ||
inflate.c | ||
interval_tree.c | ||
interval_tree_test.c | ||
iomap.c | ||
iomap_copy.c | ||
iommu-helper.c | ||
iov_iter.c | ||
irq_poll.c | ||
irq_regs.c | ||
is_single_threaded.c | ||
kasprintf.c | ||
kfifo.c | ||
klist.c | ||
kobject.c | ||
kobject_uevent.c | ||
kstrtox.c | ||
kstrtox.h | ||
libcrc32c.c | ||
linear_ranges.c | ||
list-test.c | ||
list_debug.c | ||
list_sort.c | ||
llist.c | ||
locking-selftest-hardirq.h | ||
locking-selftest-mutex.h | ||
locking-selftest-rlock-hardirq.h | ||
locking-selftest-rlock-softirq.h | ||
locking-selftest-rlock.h | ||
locking-selftest-rsem.h | ||
locking-selftest-rtmutex.h | ||
locking-selftest-softirq.h | ||
locking-selftest-spin-hardirq.h | ||
locking-selftest-spin-softirq.h | ||
locking-selftest-spin.h | ||
locking-selftest-wlock-hardirq.h | ||
locking-selftest-wlock-softirq.h | ||
locking-selftest-wlock.h | ||
locking-selftest-wsem.h | ||
locking-selftest.c | ||
lockref.c | ||
logic_iomem.c | ||
logic_pio.c | ||
lru_cache.c | ||
lshrdi3.c | ||
memcat_p.c | ||
memcpy_kunit.c | ||
memory-notifier-error-inject.c | ||
memregion.c | ||
memweight.c | ||
muldi3.c | ||
net_utils.c | ||
netdev-notifier-error-inject.c | ||
nlattr.c | ||
nmi_backtrace.c | ||
nodemask.c | ||
notifier-error-inject.c | ||
notifier-error-inject.h | ||
objagg.c | ||
of-reconfig-notifier-error-inject.c | ||
oid_registry.c | ||
once.c | ||
overflow_kunit.c | ||
packing.c | ||
parman.c | ||
parser.c | ||
pci_iomap.c | ||
percpu-refcount.c | ||
percpu_counter.c | ||
percpu_test.c | ||
plist.c | ||
pm-notifier-error-inject.c | ||
radix-tree.c | ||
random32.c | ||
ratelimit.c | ||
rbtree.c | ||
rbtree_test.c | ||
ref_tracker.c | ||
refcount.c | ||
rhashtable.c | ||
sbitmap.c | ||
scatterlist.c | ||
seq_buf.c | ||
sg_pool.c | ||
sg_split.c | ||
sha1.c | ||
show_mem.c | ||
siphash.c | ||
slub_kunit.c | ||
smp_processor_id.c | ||
sort.c | ||
stackdepot.c | ||
stackinit_kunit.c | ||
stmp_device.c | ||
string.c | ||
string_helpers.c | ||
strncpy_from_user.c | ||
strnlen_user.c | ||
syscall.c | ||
test-kstrtox.c | ||
test-string_helpers.c | ||
test_bitmap.c | ||
test_bitops.c | ||
test_bits.c | ||
test_blackhole_dev.c | ||
test_bpf.c | ||
test_debug_virtual.c | ||
test_firmware.c | ||
test_fprobe.c | ||
test_fpu.c | ||
test_free_pages.c | ||
test_hash.c | ||
test_hexdump.c | ||
test_hmm.c | ||
test_hmm_uapi.h | ||
test_ida.c | ||
test_kasan.c | ||
test_kasan_module.c | ||
test_kmod.c | ||
test_kprobes.c | ||
test_linear_ranges.c | ||
test_list_sort.c | ||
test_lockup.c | ||
test_memcat_p.c | ||
test_meminit.c | ||
test_min_heap.c | ||
test_module.c | ||
test_objagg.c | ||
test_parman.c | ||
test_printf.c | ||
test_ref_tracker.c | ||
test_rhashtable.c | ||
test_scanf.c | ||
test_siphash.c | ||
test_sort.c | ||
test_static_key_base.c | ||
test_static_keys.c | ||
test_string.c | ||
test_strscpy.c | ||
test_sysctl.c | ||
test_ubsan.c | ||
test_user_copy.c | ||
test_uuid.c | ||
test_vmalloc.c | ||
test_xarray.c | ||
textsearch.c | ||
timerqueue.c | ||
ts_bm.c | ||
ts_fsm.c | ||
ts_kmp.c | ||
ubsan.c | ||
ubsan.h | ||
ucmpdi2.c | ||
ucs2_string.c | ||
usercopy.c | ||
uuid.c | ||
vsprintf.c | ||
win_minmax.c | ||
xarray.c | ||
xxhash.c |