x86: make percpu symbols zerobased on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Родитель
a698c823e1
Коммит
3e5d8f9784
|
@ -44,6 +44,8 @@ void __init x86_64_init_pda(void)
|
||||||
{
|
{
|
||||||
_cpu_pda = __cpu_pda;
|
_cpu_pda = __cpu_pda;
|
||||||
cpu_pda(0) = &_boot_cpu_pda;
|
cpu_pda(0) = &_boot_cpu_pda;
|
||||||
|
cpu_pda(0)->data_offset =
|
||||||
|
(unsigned long)(__per_cpu_load - __per_cpu_start);
|
||||||
pda_init(0);
|
pda_init(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -204,6 +204,23 @@ ENTRY(secondary_startup_64)
|
||||||
pushq $0
|
pushq $0
|
||||||
popfq
|
popfq
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
/*
|
||||||
|
* early_gdt_base should point to the gdt_page in static percpu init
|
||||||
|
* data area. Computing this requires two symbols - __per_cpu_load
|
||||||
|
* and per_cpu__gdt_page. As linker can't do no such relocation, do
|
||||||
|
* it by hand. As early_gdt_descr is manipulated by C code for
|
||||||
|
* secondary CPUs, this should be done only once for the boot CPU
|
||||||
|
* when early_gdt_descr_base contains zero.
|
||||||
|
*/
|
||||||
|
movq early_gdt_descr_base(%rip), %rax
|
||||||
|
testq %rax, %rax
|
||||||
|
jnz 1f
|
||||||
|
movq $__per_cpu_load, %rax
|
||||||
|
addq $per_cpu__gdt_page, %rax
|
||||||
|
movq %rax, early_gdt_descr_base(%rip)
|
||||||
|
1:
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
* We must switch to a new descriptor in kernel space for the GDT
|
* We must switch to a new descriptor in kernel space for the GDT
|
||||||
* because soon the kernel won't have access anymore to the userspace
|
* because soon the kernel won't have access anymore to the userspace
|
||||||
|
@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt)
|
||||||
.globl early_gdt_descr
|
.globl early_gdt_descr
|
||||||
early_gdt_descr:
|
early_gdt_descr:
|
||||||
.word GDT_ENTRIES*8-1
|
.word GDT_ENTRIES*8-1
|
||||||
.quad per_cpu__gdt_page
|
#ifdef CONFIG_SMP
|
||||||
|
early_gdt_descr_base:
|
||||||
|
.quad 0x0000000000000000
|
||||||
|
#else
|
||||||
|
.quad per_cpu__gdt_page
|
||||||
|
#endif
|
||||||
|
|
||||||
ENTRY(phys_base)
|
ENTRY(phys_base)
|
||||||
/* This must match the first entry in level2_kernel_pgt */
|
/* This must match the first entry in level2_kernel_pgt */
|
||||||
|
|
|
@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
per_cpu_offset(cpu) = ptr - __per_cpu_start;
|
per_cpu_offset(cpu) = ptr - __per_cpu_start;
|
||||||
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
|
memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
|
||||||
|
|
||||||
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,9 @@ PHDRS {
|
||||||
data PT_LOAD FLAGS(7); /* RWE */
|
data PT_LOAD FLAGS(7); /* RWE */
|
||||||
user PT_LOAD FLAGS(7); /* RWE */
|
user PT_LOAD FLAGS(7); /* RWE */
|
||||||
data.init PT_LOAD FLAGS(7); /* RWE */
|
data.init PT_LOAD FLAGS(7); /* RWE */
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
percpu PT_LOAD FLAGS(7); /* RWE */
|
||||||
|
#endif
|
||||||
note PT_NOTE FLAGS(0); /* ___ */
|
note PT_NOTE FLAGS(0); /* ___ */
|
||||||
}
|
}
|
||||||
SECTIONS
|
SECTIONS
|
||||||
|
@ -208,14 +211,26 @@ SECTIONS
|
||||||
__initramfs_end = .;
|
__initramfs_end = .;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
/*
|
||||||
|
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
|
||||||
|
* output PHDR, so the next output section - __data_nosave - should
|
||||||
|
* switch it back to data.init.
|
||||||
|
*/
|
||||||
|
. = ALIGN(PAGE_SIZE);
|
||||||
|
PERCPU_VADDR(0, :percpu)
|
||||||
|
#else
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(PAGE_SIZE)
|
||||||
|
#endif
|
||||||
|
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
|
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__nosave_begin = .;
|
__nosave_begin = .;
|
||||||
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
|
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
|
||||||
|
*(.data.nosave)
|
||||||
|
} :data.init /* switch back to data.init, see PERCPU_VADDR() above */
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__nosave_end = .;
|
__nosave_end = .;
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
|
||||||
extern char __init_begin[], __init_end[];
|
extern char __init_begin[], __init_end[];
|
||||||
extern char _sinittext[], _einittext[];
|
extern char _sinittext[], _einittext[];
|
||||||
extern char _end[];
|
extern char _end[];
|
||||||
extern char __per_cpu_start[], __per_cpu_end[];
|
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
|
||||||
extern char __kprobes_text_start[], __kprobes_text_end[];
|
extern char __kprobes_text_start[], __kprobes_text_end[];
|
||||||
extern char __initdata_begin[], __initdata_end[];
|
extern char __initdata_begin[], __initdata_end[];
|
||||||
extern char __start_rodata[], __end_rodata[];
|
extern char __start_rodata[], __end_rodata[];
|
||||||
|
|
|
@ -430,12 +430,51 @@
|
||||||
*(.initcall7.init) \
|
*(.initcall7.init) \
|
||||||
*(.initcall7s.init)
|
*(.initcall7s.init)
|
||||||
|
|
||||||
#define PERCPU(align) \
|
#define PERCPU_PROLOG(vaddr) \
|
||||||
. = ALIGN(align); \
|
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||||
VMLINUX_SYMBOL(__per_cpu_start) = .; \
|
.data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \
|
||||||
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
|
VMLINUX_SYMBOL(__per_cpu_start) = .;
|
||||||
|
|
||||||
|
#define PERCPU_EPILOG(phdr) \
|
||||||
|
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||||
|
} phdr \
|
||||||
|
. = __per_cpu_load + SIZEOF(.data.percpu);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PERCPU_VADDR - define output section for percpu area
|
||||||
|
* @vaddr: explicit base address (optional)
|
||||||
|
* @phdr: destination PHDR (optional)
|
||||||
|
*
|
||||||
|
* Macro which expands to output section for percpu area. If @vaddr
|
||||||
|
* is not blank, it specifies explicit base address and all percpu
|
||||||
|
* symbols will be offset from the given address. If blank, @vaddr
|
||||||
|
* always equals @laddr + LOAD_OFFSET.
|
||||||
|
*
|
||||||
|
* @phdr defines the output PHDR to use if not blank. Be warned that
|
||||||
|
* output PHDR is sticky. If @phdr is specified, the next output
|
||||||
|
* section in the linker script will go there too. @phdr should have
|
||||||
|
* a leading colon.
|
||||||
|
*
|
||||||
|
* This macro defines three symbols, __per_cpu_load, __per_cpu_start
|
||||||
|
* and __per_cpu_end. The first one is the vaddr of loaded percpu
|
||||||
|
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
|
||||||
|
* end offset.
|
||||||
|
*/
|
||||||
|
#define PERCPU_VADDR(vaddr, phdr) \
|
||||||
|
PERCPU_PROLOG(vaddr) \
|
||||||
*(.data.percpu.page_aligned) \
|
*(.data.percpu.page_aligned) \
|
||||||
*(.data.percpu) \
|
*(.data.percpu) \
|
||||||
*(.data.percpu.shared_aligned) \
|
*(.data.percpu.shared_aligned) \
|
||||||
} \
|
PERCPU_EPILOG(phdr)
|
||||||
VMLINUX_SYMBOL(__per_cpu_end) = .;
|
|
||||||
|
/**
|
||||||
|
* PERCPU - define output section for percpu area, simple version
|
||||||
|
* @align: required alignment
|
||||||
|
*
|
||||||
|
* Align to @align and outputs output section for percpu area. This
|
||||||
|
* macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
|
||||||
|
* __per_cpu_start will be identical.
|
||||||
|
*/
|
||||||
|
#define PERCPU(align) \
|
||||||
|
. = ALIGN(align); \
|
||||||
|
PERCPU_VADDR( , )
|
||||||
|
|
Загрузка…
Ссылка в новой задаче