[PATCH] s390: fix memory holes and cleanup setup_arch
The memory setup didn't take care of memory holes and this makes the memory management think there would be more memory available than there is in reality. That causes the OOM killer to kill processes even if there is enough memory left that can be written to the swap space. The patch fixes this by using free_area_init_node with an array of memory holes instead of free_area_init. Further the patch cleans up the code in setup.c by splitting setup_arch into smaller pieces. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Родитель
4b7e070662
Коммит
c9e3735359
|
@ -60,6 +60,8 @@ struct {
|
|||
#define CHUNK_READ_WRITE 0
|
||||
#define CHUNK_READ_ONLY 1
|
||||
volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
|
||||
unsigned long __initdata zholes_size[MAX_NR_ZONES];
|
||||
static unsigned long __initdata memory_end;
|
||||
|
||||
/*
|
||||
* Setup options
|
||||
|
@ -78,11 +80,15 @@ static char command_line[COMMAND_LINE_SIZE] = { 0, };
|
|||
|
||||
static struct resource code_resource = {
|
||||
.name = "Kernel code",
|
||||
.start = (unsigned long) &_text,
|
||||
.end = (unsigned long) &_etext - 1,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
|
||||
};
|
||||
|
||||
static struct resource data_resource = {
|
||||
.name = "Kernel data",
|
||||
.start = (unsigned long) &_etext,
|
||||
.end = (unsigned long) &_edata - 1,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
|
||||
};
|
||||
|
||||
|
@ -310,90 +316,50 @@ void machine_power_off(void)
|
|||
|
||||
EXPORT_SYMBOL(machine_power_off);
|
||||
|
||||
/*
|
||||
* Setup function called from init/main.c just after the banner
|
||||
* was printed.
|
||||
*/
|
||||
extern char _pstart, _pend, _stext;
|
||||
|
||||
void __init setup_arch(char **cmdline_p)
|
||||
static void __init
|
||||
add_memory_hole(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long bootmap_size;
|
||||
unsigned long memory_start, memory_end;
|
||||
char c = ' ', cn, *to = command_line, *from = COMMAND_LINE;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
static unsigned int smptrap=0;
|
||||
unsigned long delay = 0;
|
||||
struct _lowcore *lc;
|
||||
int i;
|
||||
unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
|
||||
|
||||
if (smptrap)
|
||||
return;
|
||||
smptrap=1;
|
||||
if (end <= dma_pfn)
|
||||
zholes_size[ZONE_DMA] += end - start + 1;
|
||||
else if (start > dma_pfn)
|
||||
zholes_size[ZONE_NORMAL] += end - start + 1;
|
||||
else {
|
||||
zholes_size[ZONE_DMA] += dma_pfn - start + 1;
|
||||
zholes_size[ZONE_NORMAL] += end - dma_pfn;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* print what head.S has found out about the machine
|
||||
*/
|
||||
#ifndef CONFIG_ARCH_S390X
|
||||
printk((MACHINE_IS_VM) ?
|
||||
"We are running under VM (31 bit mode)\n" :
|
||||
"We are running native (31 bit mode)\n");
|
||||
printk((MACHINE_HAS_IEEE) ?
|
||||
"This machine has an IEEE fpu\n" :
|
||||
"This machine has no IEEE fpu\n");
|
||||
#else /* CONFIG_ARCH_S390X */
|
||||
printk((MACHINE_IS_VM) ?
|
||||
"We are running under VM (64 bit mode)\n" :
|
||||
"We are running native (64 bit mode)\n");
|
||||
#endif /* CONFIG_ARCH_S390X */
|
||||
static void __init
|
||||
parse_cmdline_early(char **cmdline_p)
|
||||
{
|
||||
char c = ' ', cn, *to = command_line, *from = COMMAND_LINE;
|
||||
unsigned long delay = 0;
|
||||
|
||||
ROOT_DEV = Root_RAM0;
|
||||
memory_start = (unsigned long) &_end; /* fixit if use $CODELO etc*/
|
||||
#ifndef CONFIG_ARCH_S390X
|
||||
memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */
|
||||
/*
|
||||
* We need some free virtual space to be able to do vmalloc.
|
||||
* On a machine with 2GB memory we make sure that we have at
|
||||
* least 128 MB free space for vmalloc.
|
||||
*/
|
||||
if (memory_end > 1920*1024*1024)
|
||||
memory_end = 1920*1024*1024;
|
||||
#else /* CONFIG_ARCH_S390X */
|
||||
memory_end = memory_size & ~0x200000UL; /* detected in head.s */
|
||||
#endif /* CONFIG_ARCH_S390X */
|
||||
init_mm.start_code = PAGE_OFFSET;
|
||||
init_mm.end_code = (unsigned long) &_etext;
|
||||
init_mm.end_data = (unsigned long) &_edata;
|
||||
init_mm.brk = (unsigned long) &_end;
|
||||
/* Save unparsed command line copy for /proc/cmdline */
|
||||
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
|
||||
saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
|
||||
|
||||
code_resource.start = (unsigned long) &_text;
|
||||
code_resource.end = (unsigned long) &_etext - 1;
|
||||
data_resource.start = (unsigned long) &_etext;
|
||||
data_resource.end = (unsigned long) &_edata - 1;
|
||||
|
||||
/* Save unparsed command line copy for /proc/cmdline */
|
||||
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
|
||||
saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* "mem=XXX[kKmM]" sets memsize
|
||||
*/
|
||||
if (c == ' ' && strncmp(from, "mem=", 4) == 0) {
|
||||
memory_end = simple_strtoul(from+4, &from, 0);
|
||||
if ( *from == 'K' || *from == 'k' ) {
|
||||
memory_end = memory_end << 10;
|
||||
from++;
|
||||
} else if ( *from == 'M' || *from == 'm' ) {
|
||||
memory_end = memory_end << 20;
|
||||
from++;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
|
||||
*/
|
||||
if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) {
|
||||
delay = simple_strtoul(from+9, &from, 0);
|
||||
for (;;) {
|
||||
/*
|
||||
* "mem=XXX[kKmM]" sets memsize
|
||||
*/
|
||||
if (c == ' ' && strncmp(from, "mem=", 4) == 0) {
|
||||
memory_end = simple_strtoul(from+4, &from, 0);
|
||||
if ( *from == 'K' || *from == 'k' ) {
|
||||
memory_end = memory_end << 10;
|
||||
from++;
|
||||
} else if ( *from == 'M' || *from == 'm' ) {
|
||||
memory_end = memory_end << 20;
|
||||
from++;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
|
||||
*/
|
||||
if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) {
|
||||
delay = simple_strtoul(from+9, &from, 0);
|
||||
if (*from == 's' || *from == 'S') {
|
||||
delay = delay*1000000;
|
||||
from++;
|
||||
|
@ -403,115 +369,39 @@ void __init setup_arch(char **cmdline_p)
|
|||
}
|
||||
/* now wait for the requested amount of time */
|
||||
udelay(delay);
|
||||
}
|
||||
cn = *(from++);
|
||||
if (!cn)
|
||||
break;
|
||||
if (cn == '\n')
|
||||
cn = ' '; /* replace newlines with space */
|
||||
}
|
||||
cn = *(from++);
|
||||
if (!cn)
|
||||
break;
|
||||
if (cn == '\n')
|
||||
cn = ' '; /* replace newlines with space */
|
||||
if (cn == 0x0d)
|
||||
cn = ' '; /* replace 0x0d with space */
|
||||
if (cn == ' ' && c == ' ')
|
||||
continue; /* remove additional spaces */
|
||||
c = cn;
|
||||
if (to - command_line >= COMMAND_LINE_SIZE)
|
||||
break;
|
||||
*(to++) = c;
|
||||
}
|
||||
if (c == ' ' && to > command_line) to--;
|
||||
*to = '\0';
|
||||
*cmdline_p = command_line;
|
||||
|
||||
/*
|
||||
* partially used pages are not usable - thus
|
||||
* we are rounding upwards:
|
||||
*/
|
||||
start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
end_pfn = max_pfn = memory_end >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Initialize the boot-time allocator (with low memory only):
|
||||
*/
|
||||
bootmap_size = init_bootmem(start_pfn, end_pfn);
|
||||
|
||||
/*
|
||||
* Register RAM areas with the bootmem allocator.
|
||||
*/
|
||||
for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) {
|
||||
unsigned long start_chunk, end_chunk;
|
||||
|
||||
if (memory_chunk[i].type != CHUNK_READ_WRITE)
|
||||
continue;
|
||||
start_chunk = (memory_chunk[i].addr + PAGE_SIZE - 1);
|
||||
start_chunk >>= PAGE_SHIFT;
|
||||
end_chunk = (memory_chunk[i].addr + memory_chunk[i].size);
|
||||
end_chunk >>= PAGE_SHIFT;
|
||||
if (start_chunk < start_pfn)
|
||||
start_chunk = start_pfn;
|
||||
if (end_chunk > end_pfn)
|
||||
end_chunk = end_pfn;
|
||||
if (start_chunk < end_chunk)
|
||||
free_bootmem(start_chunk << PAGE_SHIFT,
|
||||
(end_chunk - start_chunk) << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve the bootmem bitmap itself as well. We do this in two
|
||||
* steps (first step was init_bootmem()) because this catches
|
||||
* the (very unlikely) case of us accidentally initializing the
|
||||
* bootmem allocator with an invalid RAM area.
|
||||
*/
|
||||
reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
if (INITRD_START) {
|
||||
if (INITRD_START + INITRD_SIZE <= memory_end) {
|
||||
reserve_bootmem(INITRD_START, INITRD_SIZE);
|
||||
initrd_start = INITRD_START;
|
||||
initrd_end = initrd_start + INITRD_SIZE;
|
||||
} else {
|
||||
printk("initrd extends beyond end of memory "
|
||||
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
||||
initrd_start + INITRD_SIZE, memory_end);
|
||||
initrd_start = initrd_end = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) {
|
||||
struct resource *res;
|
||||
|
||||
res = alloc_bootmem_low(sizeof(struct resource));
|
||||
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
|
||||
|
||||
switch (memory_chunk[i].type) {
|
||||
case CHUNK_READ_WRITE:
|
||||
res->name = "System RAM";
|
||||
if (cn == ' ' && c == ' ')
|
||||
continue; /* remove additional spaces */
|
||||
c = cn;
|
||||
if (to - command_line >= COMMAND_LINE_SIZE)
|
||||
break;
|
||||
case CHUNK_READ_ONLY:
|
||||
res->name = "System ROM";
|
||||
res->flags |= IORESOURCE_READONLY;
|
||||
break;
|
||||
default:
|
||||
res->name = "reserved";
|
||||
}
|
||||
res->start = memory_chunk[i].addr;
|
||||
res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
|
||||
request_resource(&iomem_resource, res);
|
||||
request_resource(res, &code_resource);
|
||||
request_resource(res, &data_resource);
|
||||
*(to++) = c;
|
||||
}
|
||||
if (c == ' ' && to > command_line) to--;
|
||||
*to = '\0';
|
||||
*cmdline_p = command_line;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup lowcore for boot cpu
|
||||
*/
|
||||
#ifndef CONFIG_ARCH_S390X
|
||||
lc = (struct _lowcore *) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0);
|
||||
memset(lc, 0, PAGE_SIZE);
|
||||
#else /* CONFIG_ARCH_S390X */
|
||||
lc = (struct _lowcore *) __alloc_bootmem(2*PAGE_SIZE, 2*PAGE_SIZE, 0);
|
||||
memset(lc, 0, 2*PAGE_SIZE);
|
||||
#endif /* CONFIG_ARCH_S390X */
|
||||
static void __init
|
||||
setup_lowcore(void)
|
||||
{
|
||||
struct _lowcore *lc;
|
||||
int lc_pages;
|
||||
|
||||
/*
|
||||
* Setup lowcore for boot cpu
|
||||
*/
|
||||
lc_pages = sizeof(void *) == 8 ? 2 : 1;
|
||||
lc = (struct _lowcore *)
|
||||
__alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0);
|
||||
memset(lc, 0, lc_pages * PAGE_SIZE);
|
||||
lc->restart_psw.mask = PSW_BASE_BITS;
|
||||
lc->restart_psw.addr =
|
||||
PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
|
||||
|
@ -546,6 +436,159 @@ void __init setup_arch(char **cmdline_p)
|
|||
lc->diag44_opcode = 0x07000700;
|
||||
#endif /* CONFIG_ARCH_S390X */
|
||||
set_prefix((u32)(unsigned long) lc);
|
||||
}
|
||||
|
||||
static void __init
|
||||
setup_resources(void)
|
||||
{
|
||||
struct resource *res;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
|
||||
res = alloc_bootmem_low(sizeof(struct resource));
|
||||
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
|
||||
switch (memory_chunk[i].type) {
|
||||
case CHUNK_READ_WRITE:
|
||||
res->name = "System RAM";
|
||||
break;
|
||||
case CHUNK_READ_ONLY:
|
||||
res->name = "System ROM";
|
||||
res->flags |= IORESOURCE_READONLY;
|
||||
break;
|
||||
default:
|
||||
res->name = "reserved";
|
||||
}
|
||||
res->start = memory_chunk[i].addr;
|
||||
res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
|
||||
request_resource(&iomem_resource, res);
|
||||
request_resource(res, &code_resource);
|
||||
request_resource(res, &data_resource);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init
|
||||
setup_memory(void)
|
||||
{
|
||||
unsigned long bootmap_size;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
unsigned long last_rw_end;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* partially used pages are not usable - thus
|
||||
* we are rounding upwards:
|
||||
*/
|
||||
start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
end_pfn = max_pfn = memory_end >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Initialize the boot-time allocator (with low memory only):
|
||||
*/
|
||||
bootmap_size = init_bootmem(start_pfn, end_pfn);
|
||||
|
||||
/*
|
||||
* Register RAM areas with the bootmem allocator.
|
||||
*/
|
||||
last_rw_end = start_pfn;
|
||||
|
||||
for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) {
|
||||
unsigned long start_chunk, end_chunk;
|
||||
|
||||
if (memory_chunk[i].type != CHUNK_READ_WRITE)
|
||||
continue;
|
||||
start_chunk = (memory_chunk[i].addr + PAGE_SIZE - 1);
|
||||
start_chunk >>= PAGE_SHIFT;
|
||||
end_chunk = (memory_chunk[i].addr + memory_chunk[i].size);
|
||||
end_chunk >>= PAGE_SHIFT;
|
||||
if (start_chunk < start_pfn)
|
||||
start_chunk = start_pfn;
|
||||
if (end_chunk > end_pfn)
|
||||
end_chunk = end_pfn;
|
||||
if (start_chunk < end_chunk) {
|
||||
free_bootmem(start_chunk << PAGE_SHIFT,
|
||||
(end_chunk - start_chunk) << PAGE_SHIFT);
|
||||
if (last_rw_end < start_chunk)
|
||||
add_memory_hole(last_rw_end, start_chunk - 1);
|
||||
last_rw_end = end_chunk;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_rw_end < end_pfn - 1)
|
||||
add_memory_hole(last_rw_end, end_pfn - 1);
|
||||
|
||||
/*
|
||||
* Reserve the bootmem bitmap itself as well. We do this in two
|
||||
* steps (first step was init_bootmem()) because this catches
|
||||
* the (very unlikely) case of us accidentally initializing the
|
||||
* bootmem allocator with an invalid RAM area.
|
||||
*/
|
||||
reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
if (INITRD_START) {
|
||||
if (INITRD_START + INITRD_SIZE <= memory_end) {
|
||||
reserve_bootmem(INITRD_START, INITRD_SIZE);
|
||||
initrd_start = INITRD_START;
|
||||
initrd_end = initrd_start + INITRD_SIZE;
|
||||
} else {
|
||||
printk("initrd extends beyond end of memory "
|
||||
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
||||
initrd_start + INITRD_SIZE, memory_end);
|
||||
initrd_start = initrd_end = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup function called from init/main.c just after the banner
|
||||
* was printed.
|
||||
*/
|
||||
|
||||
void __init
|
||||
setup_arch(char **cmdline_p)
|
||||
{
|
||||
/*
|
||||
* print what head.S has found out about the machine
|
||||
*/
|
||||
#ifndef CONFIG_ARCH_S390X
|
||||
printk((MACHINE_IS_VM) ?
|
||||
"We are running under VM (31 bit mode)\n" :
|
||||
"We are running native (31 bit mode)\n");
|
||||
printk((MACHINE_HAS_IEEE) ?
|
||||
"This machine has an IEEE fpu\n" :
|
||||
"This machine has no IEEE fpu\n");
|
||||
#else /* CONFIG_ARCH_S390X */
|
||||
printk((MACHINE_IS_VM) ?
|
||||
"We are running under VM (64 bit mode)\n" :
|
||||
"We are running native (64 bit mode)\n");
|
||||
#endif /* CONFIG_ARCH_S390X */
|
||||
|
||||
ROOT_DEV = Root_RAM0;
|
||||
#ifndef CONFIG_ARCH_S390X
|
||||
memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */
|
||||
/*
|
||||
* We need some free virtual space to be able to do vmalloc.
|
||||
* On a machine with 2GB memory we make sure that we have at
|
||||
* least 128 MB free space for vmalloc.
|
||||
*/
|
||||
if (memory_end > 1920*1024*1024)
|
||||
memory_end = 1920*1024*1024;
|
||||
#else /* CONFIG_ARCH_S390X */
|
||||
memory_end = memory_size & ~0x200000UL; /* detected in head.s */
|
||||
#endif /* CONFIG_ARCH_S390X */
|
||||
|
||||
init_mm.start_code = PAGE_OFFSET;
|
||||
init_mm.end_code = (unsigned long) &_etext;
|
||||
init_mm.end_data = (unsigned long) &_edata;
|
||||
init_mm.brk = (unsigned long) &_end;
|
||||
|
||||
parse_cmdline_early(cmdline_p);
|
||||
|
||||
setup_memory();
|
||||
setup_resources();
|
||||
setup_lowcore();
|
||||
|
||||
cpu_init();
|
||||
__cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
|
||||
|
||||
|
|
|
@ -101,6 +101,7 @@ extern unsigned long _end;
|
|||
extern unsigned long __init_begin;
|
||||
extern unsigned long __init_end;
|
||||
|
||||
extern unsigned long __initdata zholes_size[];
|
||||
/*
|
||||
* paging_init() sets up the page tables
|
||||
*/
|
||||
|
@ -163,10 +164,13 @@ void __init paging_init(void)
|
|||
local_flush_tlb();
|
||||
|
||||
{
|
||||
unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0};
|
||||
unsigned long zones_size[MAX_NR_ZONES];
|
||||
|
||||
memset(zones_size, 0, sizeof(zones_size));
|
||||
zones_size[ZONE_DMA] = max_low_pfn;
|
||||
free_area_init(zones_size);
|
||||
free_area_init_node(0, &contig_page_data, zones_size,
|
||||
__pa(PAGE_OFFSET) >> PAGE_SHIFT,
|
||||
zholes_size);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -184,9 +188,10 @@ void __init paging_init(void)
|
|||
_KERN_REGION_TABLE;
|
||||
static const int ssm_mask = 0x04000000L;
|
||||
|
||||
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
|
||||
unsigned long zones_size[MAX_NR_ZONES];
|
||||
unsigned long dma_pfn, high_pfn;
|
||||
|
||||
memset(zones_size, 0, sizeof(zones_size));
|
||||
dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
|
||||
high_pfn = max_low_pfn;
|
||||
|
||||
|
@ -198,8 +203,8 @@ void __init paging_init(void)
|
|||
}
|
||||
|
||||
/* Initialize mem_map[]. */
|
||||
free_area_init(zones_size);
|
||||
|
||||
free_area_init_node(0, &contig_page_data, zones_size,
|
||||
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
|
||||
|
||||
/*
|
||||
* map whole physical memory to virtual memory (identity mapping)
|
||||
|
|
Загрузка…
Ссылка в новой задаче