dc6c9a35b6
Dave noticed that unprivileged process can allocate significant amount of memory -- >500 MiB on x86_64 -- and stay unnoticed by oom-killer and memory cgroup. The trick is to allocate a lot of PMD page tables. Linux kernel doesn't account PMD tables to the process, only PTE. The use-cases below use few tricks to allocate a lot of PMD page tables while keeping VmRSS and VmPTE low. oom_score for the process will be 0. #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <sys/prctl.h> #define PUD_SIZE (1UL << 30) #define PMD_SIZE (1UL << 21) #define NR_PUD 130000 int main(void) { char *addr = NULL; unsigned long i; prctl(PR_SET_THP_DISABLE); for (i = 0; i < NR_PUD ; i++) { addr = mmap(addr + PUD_SIZE, PUD_SIZE, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); break; } *addr = 'x'; munmap(addr, PMD_SIZE); mmap(addr, PMD_SIZE, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0); if (addr == MAP_FAILED) perror("re-mmap"), exit(1); } printf("PID %d consumed %lu KiB in PMD page tables\n", getpid(), i * 4096 >> 10); return pause(); } The patch addresses the issue by account PMD tables to the process the same way we account PTE. The main place where PMD tables is accounted is __pmd_alloc() and free_pmd_range(). But there're few corner cases: - HugeTLB can share PMD page tables. The patch handles by accounting the table to all processes who share it. - x86 PAE pre-allocates few PMD tables on fork. - Architectures with FIRST_USER_ADDRESS > 0. We need to adjust sanity check on exit(2). Accounting only happens on configuration where PMD page table's level is present (PMD is not folded). As with nr_ptes we use per-mm counter. The counter value is used to calculate baseline for badness score by oom-killer. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reported-by: Dave Hansen <dave.hansen@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: David Rientjes <rientjes@google.com> Tested-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
||
---|---|---|
.. | ||
ABI | ||
DocBook | ||
EDID | ||
PCI | ||
RCU | ||
accounting | ||
acpi | ||
aoe | ||
arm | ||
arm64 | ||
auxdisplay | ||
backlight | ||
blackfin | ||
block | ||
blockdev | ||
bus-devices | ||
cdrom | ||
cgroups | ||
connector | ||
console | ||
cpu-freq | ||
cpuidle | ||
cris | ||
crypto | ||
development-process | ||
device-mapper | ||
devicetree | ||
dmaengine | ||
driver-model | ||
dvb | ||
early-userspace | ||
extcon | ||
fault-injection | ||
fb | ||
filesystems | ||
firmware_class | ||
fmc | ||
frv | ||
gpio | ||
hid | ||
hwmon | ||
i2c | ||
i2o | ||
ia64 | ||
ide | ||
infiniband | ||
input | ||
ioctl | ||
isdn | ||
ja_JP | ||
kbuild | ||
kdump | ||
ko_KR | ||
laptops | ||
leds | ||
locking | ||
m68k | ||
memory-devices | ||
metag | ||
mic | ||
mips | ||
misc-devices | ||
mmc | ||
mn10300 | ||
mtd | ||
namespaces | ||
netlabel | ||
networking | ||
nfc | ||
nios2 | ||
parisc | ||
pcmcia | ||
phy | ||
platform | ||
power | ||
powerpc | ||
pps | ||
prctl | ||
pti | ||
ptp | ||
rapidio | ||
s390 | ||
scheduler | ||
scsi | ||
security | ||
serial | ||
sh | ||
sound | ||
spi | ||
sysctl | ||
target | ||
thermal | ||
timers | ||
tpm | ||
trace | ||
usb | ||
vDSO | ||
video4linux | ||
virtual | ||
vm | ||
w1 | ||
watchdog | ||
wimax | ||
x86 | ||
xtensa | ||
zh_CN | ||
00-INDEX | ||
BUG-HUNTING | ||
Changes | ||
CodingStyle | ||
DMA-API-HOWTO.txt | ||
DMA-API.txt | ||
DMA-ISA-LPC.txt | ||
DMA-attributes.txt | ||
HOWTO | ||
IPMI.txt | ||
IRQ-affinity.txt | ||
IRQ-domain.txt | ||
IRQ.txt | ||
Intel-IOMMU.txt | ||
Makefile | ||
ManagementStyle | ||
SAK.txt | ||
SM501.txt | ||
SecurityBugs | ||
SubmitChecklist | ||
SubmittingDrivers | ||
SubmittingPatches | ||
VGA-softcursor.txt | ||
applying-patches.txt | ||
assoc_array.txt | ||
atomic_ops.txt | ||
bad_memory.txt | ||
basic_profiling.txt | ||
bcache.txt | ||
binfmt_misc.txt | ||
braille-console.txt | ||
bt8xxgpio.txt | ||
btmrvl.txt | ||
bus-virt-phys-mapping.txt | ||
cachetlb.txt | ||
circular-buffers.txt | ||
clk.txt | ||
coccinelle.txt | ||
cpu-hotplug.txt | ||
cpu-load.txt | ||
cputopology.txt | ||
crc32.txt | ||
dcdbas.txt | ||
debugging-modules.txt | ||
debugging-via-ohci1394.txt | ||
dell_rbu.txt | ||
devices.txt | ||
digsig.txt | ||
dma-buf-sharing.txt | ||
dontdiff | ||
dynamic-debug-howto.txt | ||
edac.txt | ||
efi-stub.txt | ||
eisa.txt | ||
email-clients.txt | ||
flexible-arrays.txt | ||
futex-requeue-pi.txt | ||
gcov.txt | ||
highuid.txt | ||
hsi.txt | ||
hw_random.txt | ||
hwspinlock.txt | ||
init.txt | ||
initrd.txt | ||
intel_txt.txt | ||
io-mapping.txt | ||
io_ordering.txt | ||
iostats.txt | ||
irqflags-tracing.txt | ||
isapnp.txt | ||
java.txt | ||
kernel-doc-nano-HOWTO.txt | ||
kernel-docs.txt | ||
kernel-parameters.txt | ||
kernel-per-CPU-kthreads.txt | ||
kmemcheck.txt | ||
kmemleak.txt | ||
kobject.txt | ||
kprobes.txt | ||
kref.txt | ||
kselftest.txt | ||
ldm.txt | ||
local_ops.txt | ||
lockup-watchdogs.txt | ||
logo.gif | ||
logo.txt | ||
lzo.txt | ||
magic-number.txt | ||
mailbox.txt | ||
md.txt | ||
media-framework.txt | ||
memory-barriers.txt | ||
memory-hotplug.txt | ||
module-signing.txt | ||
mono.txt | ||
nommu-mmap.txt | ||
numastat.txt | ||
oops-tracing.txt | ||
padata.txt | ||
parport-lowlevel.txt | ||
parport.txt | ||
percpu-rw-semaphore.txt | ||
phy.txt | ||
pi-futex.txt | ||
pinctrl.txt | ||
pnp.txt | ||
preempt-locking.txt | ||
printk-formats.txt | ||
pwm.txt | ||
ramoops.txt | ||
rbtree.txt | ||
remoteproc.txt | ||
rfkill.txt | ||
robust-futex-ABI.txt | ||
robust-futexes.txt | ||
rpmsg.txt | ||
rtc.txt | ||
serial-console.txt | ||
sgi-ioc4.txt | ||
smsc_ece1099.txt | ||
sparse.txt | ||
stable_api_nonsense.txt | ||
stable_kernel_rules.txt | ||
static-keys.txt | ||
svga.txt | ||
sysfs-rules.txt | ||
sysrq.txt | ||
this_cpu_ops.txt | ||
unaligned-memory-access.txt | ||
unicode.txt | ||
unshare.txt | ||
vfio.txt | ||
vgaarbiter.txt | ||
video-output.txt | ||
vme_api.txt | ||
volatile-considered-harmful.txt | ||
workqueue.txt | ||
xillybus.txt | ||
xz.txt | ||
zorro.txt |