2019-05-27 09:55:01 +03:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* c 2001 PPC 64 Team, IBM Corp
|
|
|
|
*/
|
|
|
|
|
2011-05-10 23:28:52 +04:00
|
|
|
#include <linux/smp.h>
|
2011-07-23 02:24:23 +04:00
|
|
|
#include <linux/export.h>
|
2010-07-12 08:36:09 +04:00
|
|
|
#include <linux/memblock.h>
|
2017-02-04 03:20:53 +03:00
|
|
|
#include <linux/sched/task.h>
|
2019-03-06 02:42:58 +03:00
|
|
|
#include <linux/numa.h>
|
2020-06-09 07:32:42 +03:00
|
|
|
#include <linux/pgtable.h>
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
#include <asm/lppaca.h>
|
|
|
|
#include <asm/paca.h>
|
powerpc: Make the 64-bit kernel as a position-independent executable
This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE) when it is set. This involves
processing the dynamic relocations in the image in the early stages of
booting, even if the kernel is being run at the address it is linked at,
since the linker does not necessarily fill in words in the image for
which there are dynamic relocations. (In fact the linker does fill in
such words for 64-bit executables, though not for 32-bit executables,
so in principle we could avoid calling relocate() entirely when we're
running a 64-bit kernel at the linked address.)
The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run. In fact we call it twice; once before calling prom_init, and again
when starting the main kernel. This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.
This also changes __va and __pa to use an equivalent definition that is
simpler. With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).
With this, relocatable kernels still copy themselves down to physical
address 0 and run there.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-08-30 05:43:47 +04:00
|
|
|
#include <asm/sections.h>
|
2010-05-13 23:40:11 +04:00
|
|
|
#include <asm/kexec.h>
|
2019-08-20 05:13:18 +03:00
|
|
|
#include <asm/svm.h>
|
|
|
|
#include <asm/ultravisor.h>
|
powerpc/rtas: Implement reentrant rtas call
Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and "ibm,set-xive".
On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:
2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.
So, these rtas-calls can be called in a lockless way, if using
a different buffer for each cpu doing such rtas call.
For this, it was suggested to add the buffer (struct rtas_args)
in the PACA struct, so each cpu can have it's own buffer.
The PACA struct received a pointer to rtas buffer, which is
allocated in the memory range available to rtas 32-bit.
Reentrant rtas calls are useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.
This is a backtrace of a deadlock from a kdump testing environment:
#0 arch_spin_lock
#1 lock_rtas ()
#2 rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
#3 ics_rtas_mask_real_irq (hw_irq=4100)
#4 machine_kexec_mask_interrupts
#5 default_machine_crash_shutdown
#6 machine_crash_shutdown
#7 __crash_kexec
#8 crash_kexec
#9 oops_end
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
[mpe: Move under #ifdef PSERIES to avoid build breakage]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200518234245.200672-3-leobras.c@gmail.com
2020-05-19 02:42:45 +03:00
|
|
|
#include <asm/rtas.h>
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2017-12-22 14:17:13 +03:00
|
|
|
#include "setup.h"
|
|
|
|
|
2018-02-13 18:08:20 +03:00
|
|
|
#ifndef CONFIG_SMP
|
|
|
|
#define boot_cpuid 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void *__init alloc_paca_data(unsigned long size, unsigned long align,
|
|
|
|
unsigned long limit, int cpu)
|
|
|
|
{
|
2019-03-08 03:30:48 +03:00
|
|
|
void *ptr;
|
2018-02-13 18:08:20 +03:00
|
|
|
int nid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* boot_cpuid paca is allocated very early before cpu_to_node is up.
|
|
|
|
* Set bottom-up mode, because the boot CPU should be on node-0,
|
|
|
|
* which will put its paca in the right place.
|
|
|
|
*/
|
|
|
|
if (cpu == boot_cpuid) {
|
2019-03-06 02:42:58 +03:00
|
|
|
nid = NUMA_NO_NODE;
|
2018-02-13 18:08:20 +03:00
|
|
|
memblock_set_bottom_up(true);
|
|
|
|
} else {
|
|
|
|
nid = early_cpu_to_node(cpu);
|
|
|
|
}
|
|
|
|
|
2019-03-08 03:30:48 +03:00
|
|
|
ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
|
|
|
|
limit, nid);
|
|
|
|
if (!ptr)
|
|
|
|
panic("cannot allocate paca data");
|
2018-02-13 18:08:20 +03:00
|
|
|
|
|
|
|
if (cpu == boot_cpuid)
|
|
|
|
memblock_set_bottom_up(false);
|
|
|
|
|
2019-03-08 03:30:48 +03:00
|
|
|
return ptr;
|
2018-02-13 18:08:20 +03:00
|
|
|
}
|
|
|
|
|
2018-02-13 18:08:11 +03:00
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
2009-06-03 01:17:41 +04:00
|
|
|
|
2019-08-20 05:13:17 +03:00
|
|
|
#define LPPACA_SIZE 0x400
|
|
|
|
|
2020-06-12 17:29:53 +03:00
|
|
|
static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit,
|
|
|
|
int cpu)
|
2019-08-20 05:13:18 +03:00
|
|
|
{
|
|
|
|
size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
|
|
|
|
static unsigned long shared_lppaca_size;
|
|
|
|
static void *shared_lppaca;
|
|
|
|
void *ptr;
|
|
|
|
|
|
|
|
if (!shared_lppaca) {
|
|
|
|
memblock_set_bottom_up(true);
|
|
|
|
|
2020-06-12 17:29:53 +03:00
|
|
|
/*
|
|
|
|
* See Documentation/powerpc/ultravisor.rst for more details.
|
|
|
|
*
|
|
|
|
* UV/HV data sharing is in PAGE_SIZE granularity. In order to
|
|
|
|
* minimize the number of pages shared, align the allocation to
|
|
|
|
* PAGE_SIZE.
|
|
|
|
*/
|
2019-08-20 05:13:18 +03:00
|
|
|
shared_lppaca =
|
|
|
|
memblock_alloc_try_nid(shared_lppaca_total_size,
|
|
|
|
PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
|
|
|
|
limit, NUMA_NO_NODE);
|
|
|
|
if (!shared_lppaca)
|
|
|
|
panic("cannot allocate shared data");
|
|
|
|
|
|
|
|
memblock_set_bottom_up(false);
|
|
|
|
uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
|
|
|
|
shared_lppaca_total_size >> PAGE_SHIFT);
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr = shared_lppaca + shared_lppaca_size;
|
|
|
|
shared_lppaca_size += size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is very early in boot, so no harm done if the kernel crashes at
|
|
|
|
* this point.
|
|
|
|
*/
|
2020-06-19 10:01:13 +03:00
|
|
|
BUG_ON(shared_lppaca_size > shared_lppaca_total_size);
|
2019-08-20 05:13:18 +03:00
|
|
|
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
2006-01-13 02:26:42 +03:00
|
|
|
/*
|
2018-02-13 18:08:13 +03:00
|
|
|
* See asm/lppaca.h for more detail.
|
|
|
|
*
|
|
|
|
* lppaca structures must must be 1kB in size, L1 cache line aligned,
|
|
|
|
* and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
|
|
|
|
* these requirements.
|
2006-01-13 02:26:42 +03:00
|
|
|
*/
|
2018-02-13 18:08:13 +03:00
|
|
|
static inline void init_lppaca(struct lppaca *lppaca)
|
|
|
|
{
|
|
|
|
BUILD_BUG_ON(sizeof(struct lppaca) != 640);
|
|
|
|
|
|
|
|
*lppaca = (struct lppaca) {
|
2013-08-06 20:01:46 +04:00
|
|
|
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
|
2019-08-20 05:13:17 +03:00
|
|
|
.size = cpu_to_be16(LPPACA_SIZE),
|
2006-01-13 02:26:42 +03:00
|
|
|
.fpregs_in_use = 1,
|
2013-08-06 20:01:46 +04:00
|
|
|
.slb_count = cpu_to_be16(64),
|
2006-01-13 02:26:42 +03:00
|
|
|
.vmxregs_in_use = 0,
|
2018-02-13 18:08:13 +03:00
|
|
|
.page_ins = 0, };
|
2006-01-13 02:26:42 +03:00
|
|
|
};
|
|
|
|
|
2018-02-13 18:08:13 +03:00
|
|
|
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
|
2010-08-13 00:18:48 +04:00
|
|
|
{
|
|
|
|
struct lppaca *lp;
|
|
|
|
|
2019-08-20 05:13:17 +03:00
|
|
|
BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
|
2010-08-13 00:18:48 +04:00
|
|
|
|
2018-02-13 18:08:11 +03:00
|
|
|
if (early_cpu_has_feature(CPU_FTR_HVMODE))
|
|
|
|
return NULL;
|
2010-08-13 00:18:48 +04:00
|
|
|
|
2019-08-20 05:13:18 +03:00
|
|
|
if (is_secure_guest())
|
2020-06-12 17:29:53 +03:00
|
|
|
lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu);
|
2019-08-20 05:13:18 +03:00
|
|
|
else
|
|
|
|
lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
|
|
|
|
|
2018-02-13 18:08:13 +03:00
|
|
|
init_lppaca(lp);
|
2010-08-13 00:18:48 +04:00
|
|
|
|
|
|
|
return lp;
|
|
|
|
}
|
2019-08-20 05:13:17 +03:00
|
|
|
#endif /* CONFIG_PPC_PSERIES */
|
2009-06-03 01:17:41 +04:00
|
|
|
|
2017-10-19 07:08:43 +03:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
2009-06-03 01:17:41 +04:00
|
|
|
|
2006-08-07 10:19:19 +04:00
|
|
|
/*
|
2018-02-13 18:08:14 +03:00
|
|
|
* 3 persistent SLBs are allocated here. The buffer will be zero
|
2006-08-07 10:19:19 +04:00
|
|
|
* initially, hence will all be invaild until we actually write them.
|
2014-05-15 16:38:03 +04:00
|
|
|
*
|
|
|
|
* If you make the number of persistent SLB entries dynamic, please also
|
|
|
|
* update PR KVM to flush and restore them accordingly.
|
2006-08-07 10:19:19 +04:00
|
|
|
*/
|
2018-02-13 18:08:14 +03:00
|
|
|
static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
|
2013-12-05 07:42:40 +04:00
|
|
|
{
|
2017-08-13 04:33:43 +03:00
|
|
|
struct slb_shadow *s;
|
|
|
|
|
2018-02-13 18:08:14 +03:00
|
|
|
if (cpu != boot_cpuid) {
|
|
|
|
/*
|
|
|
|
* Boot CPU comes here before early_radix_enabled
|
|
|
|
* is parsed (e.g., for disable_radix). So allocate
|
|
|
|
* always and this will be fixed up in free_unused_pacas.
|
|
|
|
*/
|
|
|
|
if (early_radix_enabled())
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-12-05 07:42:40 +04:00
|
|
|
|
2018-02-13 18:08:20 +03:00
|
|
|
s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
|
2015-01-08 08:40:51 +03:00
|
|
|
|
2013-12-05 07:42:40 +04:00
|
|
|
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
|
|
|
|
s->buffer_length = cpu_to_be32(sizeof(*s));
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2017-10-19 07:08:43 +03:00
|
|
|
#endif /* CONFIG_PPC_BOOK3S_64 */
|
2009-06-03 01:17:41 +04:00
|
|
|
|
powerpc/rtas: Implement reentrant rtas call
Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and "ibm,set-xive".
On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:
2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.
So, these rtas-calls can be called in a lockless way, if using
a different buffer for each cpu doing such rtas call.
For this, it was suggested to add the buffer (struct rtas_args)
in the PACA struct, so each cpu can have it's own buffer.
The PACA struct received a pointer to rtas buffer, which is
allocated in the memory range available to rtas 32-bit.
Reentrant rtas calls are useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.
This is a backtrace of a deadlock from a kdump testing environment:
#0 arch_spin_lock
#1 lock_rtas ()
#2 rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
#3 ics_rtas_mask_real_irq (hw_irq=4100)
#4 machine_kexec_mask_interrupts
#5 default_machine_crash_shutdown
#6 machine_crash_shutdown
#7 __crash_kexec
#8 crash_kexec
#9 oops_end
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
[mpe: Move under #ifdef PSERIES to avoid build breakage]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200518234245.200672-3-leobras.c@gmail.com
2020-05-19 02:42:45 +03:00
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
|
|
/**
|
|
|
|
* new_rtas_args() - Allocates rtas args
|
|
|
|
* @cpu: CPU number
|
|
|
|
* @limit: Memory limit for this allocation
|
|
|
|
*
|
|
|
|
* Allocates a struct rtas_args and return it's pointer,
|
|
|
|
* if not in Hypervisor mode
|
|
|
|
*
|
|
|
|
* Return: Pointer to allocated rtas_args
|
|
|
|
* NULL if CPU in Hypervisor Mode
|
|
|
|
*/
|
|
|
|
static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit)
|
|
|
|
{
|
|
|
|
limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX);
|
|
|
|
|
|
|
|
if (early_cpu_has_feature(CPU_FTR_HVMODE))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES,
|
|
|
|
limit, cpu);
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_PPC_PSERIES */
|
|
|
|
|
2005-11-09 05:38:01 +03:00
|
|
|
/* The Paca is an array with one entry per processor. Each contains an
|
2005-04-17 02:20:36 +04:00
|
|
|
* lppaca, which contains the information shared between the
|
2005-11-24 08:34:45 +03:00
|
|
|
* hypervisor and Linux.
|
2005-04-17 02:20:36 +04:00
|
|
|
* On systems with hardware multi-threading, there are two threads
|
|
|
|
* per processor. The Paca array must contain an entry for each thread.
|
|
|
|
* The VPD Areas will give a max logical processors = 2 * max physical
|
|
|
|
* processors. The processor VPD array needs one entry per physical
|
|
|
|
* processor (not thread).
|
|
|
|
*/
|
2018-02-13 18:08:12 +03:00
|
|
|
struct paca_struct **paca_ptrs __read_mostly;
|
|
|
|
EXPORT_SYMBOL(paca_ptrs);
|
2008-04-24 07:43:49 +04:00
|
|
|
|
2020-10-28 11:04:33 +03:00
|
|
|
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
|
2010-01-28 16:23:22 +03:00
|
|
|
{
|
2018-02-13 18:08:11 +03:00
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
2018-02-13 18:08:13 +03:00
|
|
|
new_paca->lppaca_ptr = NULL;
|
2018-02-13 18:08:11 +03:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
2010-01-28 16:23:22 +03:00
|
|
|
new_paca->kernel_pgd = swapper_pg_dir;
|
2009-06-03 01:17:41 +04:00
|
|
|
#endif
|
2010-01-28 16:23:22 +03:00
|
|
|
new_paca->lock_token = 0x8000;
|
|
|
|
new_paca->paca_index = cpu;
|
2016-03-03 07:26:53 +03:00
|
|
|
new_paca->kernel_toc = kernel_toc_addr();
|
2010-01-28 16:23:22 +03:00
|
|
|
new_paca->kernelbase = (unsigned long) _stext;
|
2014-03-28 06:36:29 +04:00
|
|
|
/* Only set MSR:IR/DR when MMU is initialized */
|
|
|
|
new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
|
2010-01-28 16:23:22 +03:00
|
|
|
new_paca->hw_cpu_id = 0xffff;
|
2010-05-13 23:40:11 +04:00
|
|
|
new_paca->kexec_state = KEXEC_STATE_NONE;
|
2010-01-28 16:23:22 +03:00
|
|
|
new_paca->__current = &init_task;
|
2012-09-07 19:31:44 +04:00
|
|
|
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
|
2017-10-19 07:08:43 +03:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
2018-02-13 18:08:14 +03:00
|
|
|
new_paca->slb_shadow_ptr = NULL;
|
2017-10-19 07:08:43 +03:00
|
|
|
#endif
|
2013-10-12 04:22:38 +04:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
|
|
|
/* For now -- if we have threads this will be adjusted later */
|
|
|
|
new_paca->tcd_ptr = &new_paca->tcd;
|
|
|
|
#endif
|
powerpc/rtas: Implement reentrant rtas call
Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and "ibm,set-xive".
On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:
2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.
So, these rtas-calls can be called in a lockless way, if using
a different buffer for each cpu doing such rtas call.
For this, it was suggested to add the buffer (struct rtas_args)
in the PACA struct, so each cpu can have it's own buffer.
The PACA struct received a pointer to rtas buffer, which is
allocated in the memory range available to rtas 32-bit.
Reentrant rtas calls are useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.
This is a backtrace of a deadlock from a kdump testing environment:
#0 arch_spin_lock
#1 lock_rtas ()
#2 rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
#3 ics_rtas_mask_real_irq (hw_irq=4100)
#4 machine_kexec_mask_interrupts
#5 default_machine_crash_shutdown
#6 machine_crash_shutdown
#7 __crash_kexec
#8 crash_kexec
#9 oops_end
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
[mpe: Move under #ifdef PSERIES to avoid build breakage]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200518234245.200672-3-leobras.c@gmail.com
2020-05-19 02:42:45 +03:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
|
|
new_paca->rtas_args_reentrant = NULL;
|
|
|
|
#endif
|
2010-01-28 16:23:22 +03:00
|
|
|
}
|
|
|
|
|
2010-07-08 01:55:37 +04:00
|
|
|
/* Put the paca pointer into r13 and SPRG_PACA */
|
2020-10-28 11:04:33 +03:00
|
|
|
void setup_paca(struct paca_struct *new_paca)
|
2010-07-08 01:55:37 +04:00
|
|
|
{
|
2011-01-20 09:50:21 +03:00
|
|
|
/* Setup r13 */
|
2010-07-08 01:55:37 +04:00
|
|
|
local_paca = new_paca;
|
2011-01-20 09:50:21 +03:00
|
|
|
|
2010-07-08 01:55:37 +04:00
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
2011-01-20 09:50:21 +03:00
|
|
|
/* On Book3E, initialize the TLB miss exception frames */
|
2010-07-08 01:55:37 +04:00
|
|
|
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
|
2011-01-20 09:50:21 +03:00
|
|
|
#else
|
powerpc/64: Setup a paca before parsing device tree etc.
Currently we set up the paca after parsing the device tree for CPU
features. Prior to that, r13 contains random data, which means there
is random data in r13 while we're running the generic dt parsing code.
This random data varies depending on whether we boot through a vmlinux
or a zImage: for the vmlinux case it's usually around zero, but for
zImages we see random values like 912a72603d420015.
This is poor practice, and can also lead to difficult-to-debug
crashes. For example, when kcov is enabled, the kcov instrumentation
attempts to read preempt_count out of the current task, which goes via
the paca. This then crashes in the zImage case.
Similarly stack protector can cause crashes if r13 is bogus, by
reading from the stack canary in the paca.
To resolve this:
- move the paca setup to before the CPU feature parsing.
- because we no longer have access to CPU feature flags in paca
setup, change the HV feature test in the paca setup path to consider
the actual value of the MSR rather than the CPU feature.
Translations get switched on once we leave early_setup, so I think
we'd already catch any other cases where the paca or task aren't set
up.
Boot tested on a P9 guest and host.
Fixes: fb0b0a73b223 ("powerpc: Enable kcov")
Fixes: 06ec27aea9fc ("powerpc/64: add stack protector support")
Cc: stable@vger.kernel.org # v4.20+
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Daniel Axtens <dja@axtens.net>
[mpe: Reword comments & change log a bit to mention stack protector]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200320032116.1024773-1-mpe@ellerman.id.au
2020-03-20 06:21:15 +03:00
|
|
|
/*
|
|
|
|
* In HV mode, we setup both HPACA and PACA to avoid problems
|
2011-01-20 09:50:21 +03:00
|
|
|
* if we do a GET_PACA() before the feature fixups have been
|
powerpc/64: Setup a paca before parsing device tree etc.
Currently we set up the paca after parsing the device tree for CPU
features. Prior to that, r13 contains random data, which means there
is random data in r13 while we're running the generic dt parsing code.
This random data varies depending on whether we boot through a vmlinux
or a zImage: for the vmlinux case it's usually around zero, but for
zImages we see random values like 912a72603d420015.
This is poor practice, and can also lead to difficult-to-debug
crashes. For example, when kcov is enabled, the kcov instrumentation
attempts to read preempt_count out of the current task, which goes via
the paca. This then crashes in the zImage case.
Similarly stack protector can cause crashes if r13 is bogus, by
reading from the stack canary in the paca.
To resolve this:
- move the paca setup to before the CPU feature parsing.
- because we no longer have access to CPU feature flags in paca
setup, change the HV feature test in the paca setup path to consider
the actual value of the MSR rather than the CPU feature.
Translations get switched on once we leave early_setup, so I think
we'd already catch any other cases where the paca or task aren't set
up.
Boot tested on a P9 guest and host.
Fixes: fb0b0a73b223 ("powerpc: Enable kcov")
Fixes: 06ec27aea9fc ("powerpc/64: add stack protector support")
Cc: stable@vger.kernel.org # v4.20+
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Daniel Axtens <dja@axtens.net>
[mpe: Reword comments & change log a bit to mention stack protector]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200320032116.1024773-1-mpe@ellerman.id.au
2020-03-20 06:21:15 +03:00
|
|
|
* applied.
|
|
|
|
*
|
|
|
|
* Normally you should test against CPU_FTR_HVMODE, but CPU features
|
|
|
|
* are not yet set up when we first reach here.
|
2011-01-20 09:50:21 +03:00
|
|
|
*/
|
powerpc/64: Setup a paca before parsing device tree etc.
Currently we set up the paca after parsing the device tree for CPU
features. Prior to that, r13 contains random data, which means there
is random data in r13 while we're running the generic dt parsing code.
This random data varies depending on whether we boot through a vmlinux
or a zImage: for the vmlinux case it's usually around zero, but for
zImages we see random values like 912a72603d420015.
This is poor practice, and can also lead to difficult-to-debug
crashes. For example, when kcov is enabled, the kcov instrumentation
attempts to read preempt_count out of the current task, which goes via
the paca. This then crashes in the zImage case.
Similarly stack protector can cause crashes if r13 is bogus, by
reading from the stack canary in the paca.
To resolve this:
- move the paca setup to before the CPU feature parsing.
- because we no longer have access to CPU feature flags in paca
setup, change the HV feature test in the paca setup path to consider
the actual value of the MSR rather than the CPU feature.
Translations get switched on once we leave early_setup, so I think
we'd already catch any other cases where the paca or task aren't set
up.
Boot tested on a P9 guest and host.
Fixes: fb0b0a73b223 ("powerpc: Enable kcov")
Fixes: 06ec27aea9fc ("powerpc/64: add stack protector support")
Cc: stable@vger.kernel.org # v4.20+
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Daniel Axtens <dja@axtens.net>
[mpe: Reword comments & change log a bit to mention stack protector]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200320032116.1024773-1-mpe@ellerman.id.au
2020-03-20 06:21:15 +03:00
|
|
|
if (mfmsr() & MSR_HV)
|
2011-01-20 09:50:21 +03:00
|
|
|
mtspr(SPRN_SPRG_HPACA, local_paca);
|
2010-07-08 01:55:37 +04:00
|
|
|
#endif
|
2011-01-20 09:50:21 +03:00
|
|
|
mtspr(SPRN_SPRG_PACA, local_paca);
|
|
|
|
|
2010-07-08 01:55:37 +04:00
|
|
|
}
|
|
|
|
|
2018-02-13 18:08:12 +03:00
|
|
|
static int __initdata paca_nr_cpu_ids;
|
|
|
|
static int __initdata paca_ptrs_size;
|
2018-02-13 18:08:19 +03:00
|
|
|
static int __initdata paca_struct_size;
|
|
|
|
|
|
|
|
void __init allocate_paca_ptrs(void)
|
|
|
|
{
|
|
|
|
paca_nr_cpu_ids = nr_cpu_ids;
|
|
|
|
|
|
|
|
paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
|
2019-03-12 09:29:00 +03:00
|
|
|
paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
|
|
|
|
if (!paca_ptrs)
|
|
|
|
panic("Failed to allocate %d bytes for paca pointers\n",
|
|
|
|
paca_ptrs_size);
|
|
|
|
|
2018-02-13 18:08:19 +03:00
|
|
|
memset(paca_ptrs, 0x88, paca_ptrs_size);
|
|
|
|
}
|
2010-01-28 16:23:22 +03:00
|
|
|
|
2018-02-13 18:08:19 +03:00
|
|
|
void __init allocate_paca(int cpu)
|
2010-01-28 16:23:22 +03:00
|
|
|
{
|
2015-10-07 06:48:17 +03:00
|
|
|
u64 limit;
|
2018-02-13 18:08:19 +03:00
|
|
|
struct paca_struct *paca;
|
|
|
|
|
|
|
|
BUG_ON(cpu >= paca_nr_cpu_ids);
|
2010-01-28 16:23:22 +03:00
|
|
|
|
2015-10-07 06:48:17 +03:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
2010-01-28 16:23:22 +03:00
|
|
|
/*
|
2017-12-22 14:17:13 +03:00
|
|
|
* We access pacas in real mode, and cannot take SLB faults
|
|
|
|
* on them when in virtual mode, so allocate them accordingly.
|
2010-01-28 16:23:22 +03:00
|
|
|
*/
|
2017-12-22 14:17:13 +03:00
|
|
|
limit = min(ppc64_bolted_size(), ppc64_rma_size);
|
|
|
|
#else
|
|
|
|
limit = ppc64_rma_size;
|
2015-10-07 06:48:17 +03:00
|
|
|
#endif
|
2010-01-28 16:23:22 +03:00
|
|
|
|
2018-02-13 18:08:20 +03:00
|
|
|
paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
|
|
|
|
limit, cpu);
|
2018-02-13 18:08:19 +03:00
|
|
|
paca_ptrs[cpu] = paca;
|
2013-12-05 07:42:40 +04:00
|
|
|
|
2018-02-13 18:08:19 +03:00
|
|
|
initialise_paca(paca, cpu);
|
2018-02-13 18:08:13 +03:00
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
2018-02-13 18:08:19 +03:00
|
|
|
paca->lppaca_ptr = new_lppaca(cpu, limit);
|
2018-02-13 18:08:14 +03:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
2018-02-13 18:08:19 +03:00
|
|
|
paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
|
powerpc/rtas: Implement reentrant rtas call
Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and "ibm,set-xive".
On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:
2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.
So, these rtas-calls can be called in a lockless way, if using
a different buffer for each cpu doing such rtas call.
For this, it was suggested to add the buffer (struct rtas_args)
in the PACA struct, so each cpu can have it's own buffer.
The PACA struct received a pointer to rtas buffer, which is
allocated in the memory range available to rtas 32-bit.
Reentrant rtas calls are useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.
This is a backtrace of a deadlock from a kdump testing environment:
#0 arch_spin_lock
#1 lock_rtas ()
#2 rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
#3 ics_rtas_mask_real_irq (hw_irq=4100)
#4 machine_kexec_mask_interrupts
#5 default_machine_crash_shutdown
#6 machine_crash_shutdown
#7 __crash_kexec
#8 crash_kexec
#9 oops_end
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
[mpe: Move under #ifdef PSERIES to avoid build breakage]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200518234245.200672-3-leobras.c@gmail.com
2020-05-19 02:42:45 +03:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
|
|
paca->rtas_args_reentrant = new_rtas_args(cpu, limit);
|
2018-02-13 18:08:13 +03:00
|
|
|
#endif
|
2018-02-13 18:08:19 +03:00
|
|
|
paca_struct_size += sizeof(struct paca_struct);
|
2010-01-28 16:23:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init free_unused_pacas(void)
|
|
|
|
{
|
2018-02-13 18:08:12 +03:00
|
|
|
int new_ptrs_size;
|
2010-01-28 16:23:22 +03:00
|
|
|
|
2018-02-13 18:08:12 +03:00
|
|
|
new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
|
2018-02-13 18:08:19 +03:00
|
|
|
if (new_ptrs_size < paca_ptrs_size)
|
2018-02-13 18:08:12 +03:00
|
|
|
memblock_free(__pa(paca_ptrs) + new_ptrs_size,
|
|
|
|
paca_ptrs_size - new_ptrs_size);
|
2010-01-28 16:23:22 +03:00
|
|
|
|
2018-02-13 18:08:12 +03:00
|
|
|
paca_nr_cpu_ids = nr_cpu_ids;
|
|
|
|
paca_ptrs_size = new_ptrs_size;
|
2010-01-28 16:23:22 +03:00
|
|
|
|
2018-02-13 18:08:14 +03:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
|
if (early_radix_enabled()) {
|
|
|
|
/* Ugly fixup, see new_slb_shadow() */
|
|
|
|
memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
|
|
|
|
sizeof(struct slb_shadow));
|
|
|
|
paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
|
|
|
|
}
|
|
|
|
#endif
|
2010-08-13 00:18:48 +04:00
|
|
|
|
2018-02-13 18:08:19 +03:00
|
|
|
printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
|
|
|
|
paca_ptrs_size + paca_struct_size, nr_cpu_ids);
|
2008-04-24 07:43:49 +04:00
|
|
|
}
|
2017-03-22 06:36:49 +03:00
|
|
|
|
|
|
|
void copy_mm_to_paca(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S
|
|
|
|
mm_context_t *context = &mm->context;
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
2019-04-17 16:03:48 +03:00
|
|
|
VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
|
|
|
|
memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
|
|
|
|
LOW_SLICE_ARRAY_SZ);
|
|
|
|
memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
|
|
|
|
TASK_SLICE_ARRAY_SZ(context));
|
2017-03-22 06:36:49 +03:00
|
|
|
#else /* CONFIG_PPC_MM_SLICES */
|
|
|
|
get_paca()->mm_ctx_user_psize = context->user_psize;
|
|
|
|
get_paca()->mm_ctx_sllp = context->sllp;
|
|
|
|
#endif
|
2017-10-19 07:08:43 +03:00
|
|
|
#else /* !CONFIG_PPC_BOOK3S */
|
2017-03-22 06:36:49 +03:00
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
}
|